Search in sources :

Example 11 with ValueType

use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.

the class FrameReaderTextCSV method readFrameFromInputStream.

@Override
public FrameBlock readFrameFromInputStream(InputStream is, ValueType[] schema, String[] names, long rlen, long clen) throws IOException, DMLRuntimeException {
    // allocate output frame block
    ValueType[] lschema = createOutputSchema(schema, clen);
    String[] lnames = createOutputNames(names, clen);
    FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);
    // core read (sequential/parallel)
    InputStreamInputFormat informat = new InputStreamInputFormat(is);
    InputSplit split = informat.getSplits(null, 1)[0];
    readCSVFrameFromInputSplit(split, informat, null, ret, schema, names, rlen, clen, 0, true);
    return ret;
}
Also used : InputStreamInputFormat(org.apache.sysml.runtime.util.InputStreamInputFormat) ValueType(org.apache.sysml.parser.Expression.ValueType) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) InputSplit(org.apache.hadoop.mapred.InputSplit)

Example 12 with ValueType

use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.

the class FrameReaderTextCSV method readFrameFromHDFS.

@Override
public final FrameBlock readFrameFromHDFS(String fname, ValueType[] schema, String[] names, long rlen, long clen) throws IOException, DMLRuntimeException {
    // prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    FileInputFormat.addInputPath(job, path);
    // check existence and non-empty file
    checkValidInputFile(fs, path);
    // compute size if necessary
    if (rlen <= 0 || clen <= 0) {
        Pair<Integer, Integer> size = computeCSVSize(path, job, fs);
        rlen = size.getKey();
        clen = size.getValue();
    }
    // allocate output frame block
    ValueType[] lschema = createOutputSchema(schema, clen);
    String[] lnames = createOutputNames(names, clen);
    FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);
    // core read (sequential/parallel)
    readCSVFrameFromHDFS(path, job, fs, ret, lschema, lnames, rlen, clen);
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) ValueType(org.apache.sysml.parser.Expression.ValueType) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf)

Example 13 with ValueType

use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.

the class FrameReaderTextCell method readFrameFromHDFS.

@Override
public final FrameBlock readFrameFromHDFS(String fname, ValueType[] schema, String[] names, long rlen, long clen) throws IOException, DMLRuntimeException {
    // allocate output frame block
    ValueType[] lschema = createOutputSchema(schema, clen);
    String[] lnames = createOutputNames(names, clen);
    FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);
    // prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    // check existence and non-empty file
    checkValidInputFile(fs, path);
    // core read (sequential/parallel)
    readTextCellFrameFromHDFS(path, job, fs, ret, lschema, lnames, rlen, clen);
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) ValueType(org.apache.sysml.parser.Expression.ValueType) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf)

Example 14 with ValueType

use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.

the class FrameReaderTextCell method readTextCellFrameFromInputSplit.

protected static void readTextCellFrameFromInputSplit(InputSplit split, TextInputFormat informat, JobConf job, FrameBlock dest) throws IOException {
    ValueType[] schema = dest.getSchema();
    int rlen = dest.getNumRows();
    int clen = dest.getNumColumns();
    // create record reader
    RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
    LongWritable key = new LongWritable();
    Text value = new Text();
    FastStringTokenizer st = new FastStringTokenizer(' ');
    int row = -1;
    int col = -1;
    try {
        while (reader.next(key, value)) {
            // reinit tokenizer
            st.reset(value.toString());
            row = st.nextInt() - 1;
            col = st.nextInt() - 1;
            if (row == -3)
                dest.getColumnMetadata(col).setMvValue(st.nextToken());
            else if (row == -2)
                dest.getColumnMetadata(col).setNumDistinct(st.nextLong());
            else
                dest.set(row, col, UtilFunctions.stringToObject(schema[col], st.nextToken()));
        }
    } catch (Exception ex) {
        // post-mortem error handling and bounds checking
        if (row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen) {
            throw new IOException("Frame cell [" + (row + 1) + "," + (col + 1) + "] " + "out of overall frame range [1:" + rlen + ",1:" + clen + "].");
        } else {
            throw new IOException("Unable to read frame in text cell format.", ex);
        }
    } finally {
        IOUtilFunctions.closeSilently(reader);
    }
}
Also used : FastStringTokenizer(org.apache.sysml.runtime.util.FastStringTokenizer) ValueType(org.apache.sysml.parser.Expression.ValueType) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException)

Example 15 with ValueType

use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.

the class FrameReaderTextCell method readFrameFromInputStream.

@Override
public final FrameBlock readFrameFromInputStream(InputStream is, ValueType[] schema, String[] names, long rlen, long clen) throws IOException, DMLRuntimeException {
    // allocate output frame block
    ValueType[] lschema = createOutputSchema(schema, clen);
    String[] lnames = createOutputNames(names, clen);
    FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);
    // core read
    readRawTextCellFrameFromInputStream(is, ret, lschema, lnames, rlen, clen);
    return ret;
}
Also used : ValueType(org.apache.sysml.parser.Expression.ValueType) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock)

Aggregations

ValueType (org.apache.sysml.parser.Expression.ValueType)55 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)23 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)19 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)18 DataType (org.apache.sysml.parser.Expression.DataType)11 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)10 IOException (java.io.IOException)9 LongWritable (org.apache.hadoop.io.LongWritable)7 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)7 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)7 ArrayList (java.util.ArrayList)6 Text (org.apache.hadoop.io.Text)6 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)6 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)5 ConvertStringToLongTextPair (org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair)5 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)5 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)5 Row (org.apache.spark.sql.Row)4 StructType (org.apache.spark.sql.types.StructType)4 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)4