Search in sources :

Example 76 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class ConvertFrameBlockToIJVLines method call.

@Override
public Iterator<String> call(Tuple2<Long, FrameBlock> kv) throws Exception {
    long rowoffset = kv._1;
    FrameBlock block = kv._2;
    ArrayList<String> cells = new ArrayList<>();
    // write frame meta data
    if (rowoffset == 1) {
        for (int j = 0; j < block.getNumColumns(); j++) if (!block.isColumnMetadataDefault(j)) {
            cells.add("-1 " + (j + 1) + " " + block.getColumnMetadata(j).getNumDistinct());
            cells.add("-2 " + (j + 1) + " " + block.getColumnMetadata(j).getMvValue());
        }
    }
    // convert frame block to list of ijv cell triples
    StringBuilder sb = new StringBuilder();
    Iterator<String[]> iter = block.getStringRowIterator();
    for (int i = 0; iter.hasNext(); i++) {
        // for all rows
        String rowIndex = Long.toString(rowoffset + i);
        String[] row = iter.next();
        for (int j = 0; j < row.length; j++) {
            if (row[j] != null) {
                sb.append(rowIndex);
                sb.append(' ');
                sb.append(j + 1);
                sb.append(' ');
                sb.append(row[j]);
                cells.add(sb.toString());
                sb.setLength(0);
            }
        }
    }
    return cells.iterator();
}
Also used : FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) ArrayList(java.util.ArrayList)

Example 77 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class FrameReader method createOutputFrameBlock.

/**
 * NOTE: mallocDense controls if the output matrix blocks is fully allocated, this can be redundant
 * if binary block read and single block.
 *
 * @param schema schema as array of ValueTypes
 * @param names column names
 * @param nrow number of rows
 * @return frame block
 * @throws IOException if IOException occurs
 */
protected static FrameBlock createOutputFrameBlock(ValueType[] schema, String[] names, long nrow) throws IOException {
    // check schema and column names
    if (!OptimizerUtils.isValidCPDimensions(schema, names))
        throw new DMLRuntimeException("Schema and names to be define with equal size.");
    // prepare result frame block
    FrameBlock ret = new FrameBlock(schema, names);
    ret.ensureAllocatedColumns((int) nrow);
    return ret;
}
Also used : FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 78 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class FrameReaderBinaryBlock method readFrameFromHDFS.

@Override
public final FrameBlock readFrameFromHDFS(String fname, ValueType[] schema, String[] names, long rlen, long clen) throws IOException, DMLRuntimeException {
    // allocate output frame block
    ValueType[] lschema = createOutputSchema(schema, clen);
    String[] lnames = createOutputNames(names, clen);
    FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);
    // prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    // check existence and non-empty file
    checkValidInputFile(fs, path);
    // core read (sequential/parallel)
    readBinaryBlockFrameFromHDFS(path, job, fs, ret, rlen, clen);
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) ValueType(org.apache.sysml.parser.Expression.ValueType) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf)

Example 79 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class FrameReaderBinaryBlock method readFirstBlock.

/**
 * Specific functionality of FrameReaderBinaryBlock, mostly used for testing.
 *
 * @param fname file name
 * @return frame block
 * @throws IOException if IOException occurs
 */
@SuppressWarnings("deprecation")
public FrameBlock readFirstBlock(String fname) throws IOException {
    // prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    LongWritable key = new LongWritable();
    FrameBlock value = new FrameBlock();
    // read first block from first file
    Path lpath = IOUtilFunctions.getSequenceFilePaths(fs, path)[0];
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
    try {
        reader.next(key, value);
    } finally {
        IOUtilFunctions.closeSilently(reader);
    }
    return value;
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FileSystem(org.apache.hadoop.fs.FileSystem) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf)

Example 80 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class FrameWriter method createFrameBlocksForReuse.

public static FrameBlock[] createFrameBlocksForReuse(ValueType[] schema, String[] names, long rlen) {
    FrameBlock[] frameBlock = new FrameBlock[1];
    frameBlock[0] = new FrameBlock(schema, names);
    return frameBlock;
}
Also used : FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock)

Aggregations

FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)90 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)28 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)26 ValueType (org.apache.sysml.parser.Expression.ValueType)23 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)23 FrameReader (org.apache.sysml.runtime.io.FrameReader)18 IOException (java.io.IOException)16 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)16 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)15 LongWritable (org.apache.hadoop.io.LongWritable)12 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)11 CSVFileFormatProperties (org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties)11 FrameWriter (org.apache.sysml.runtime.io.FrameWriter)9 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)8 Text (org.apache.hadoop.io.Text)7 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)7 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)7 ConvertStringToLongTextPair (org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair)6 CopyTextInputFunction (org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction)5 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)5