Search in sources :

Example 6 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class MLContextConversionUtil method javaRDDStringIJVToMatrixObject.

/**
 * Convert a {@code JavaRDD<String>} in IJV format to a {@code MatrixObject}
 * . Note that metadata is required for IJV format.
 *
 * @param javaRDD
 *            the Java RDD of strings
 * @param matrixMetadata
 *            matrix metadata
 * @return the {@code JavaRDD<String>} converted to a {@code MatrixObject}
 */
public static MatrixObject javaRDDStringIJVToMatrixObject(JavaRDD<String> javaRDD, MatrixMetadata matrixMetadata) {
    JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
    MatrixCharacteristics mc = (matrixMetadata != null) ? matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
    MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.TextCellOutputInfo, InputInfo.TextCellInputInfo));
    JavaPairRDD<LongWritable, Text> javaPairRDD2 = javaPairRDD.mapToPair(new CopyTextInputFunction());
    matrixObject.setRDDHandle(new RDDObject(javaPairRDD2));
    return matrixObject;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) CopyTextInputFunction(org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ConvertStringToLongTextPair(org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 7 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class MLContextConversionUtil method binaryBlocksToFrameObject.

/**
 * Convert a {@code JavaPairRDD<Long, FrameBlock>} to a {@code FrameObject}.
 *
 * @param binaryBlocks
 *            {@code JavaPairRDD<Long, FrameBlock>} representation of a
 *            binary-block frame
 * @param frameMetadata
 *            the frame metadata
 * @return the {@code JavaPairRDD<Long, FrameBlock>} frame converted to a
 *         {@code FrameObject}
 */
public static FrameObject binaryBlocksToFrameObject(JavaPairRDD<Long, FrameBlock> binaryBlocks, FrameMetadata frameMetadata) {
    MatrixCharacteristics mc = (frameMetadata != null) ? frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
    ValueType[] schema = (frameMetadata != null) ? frameMetadata.getFrameSchema().getSchema().toArray(new ValueType[0]) : UtilFunctions.nCopies((int) mc.getCols(), ValueType.STRING);
    FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo), schema);
    frameObject.setRDDHandle(new RDDObject(binaryBlocks));
    return frameObject;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) ValueType(org.apache.sysml.parser.Expression.ValueType) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 8 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class MLContextConversionUtil method javaRDDStringCSVToFrameObject.

/**
 * Convert a {@code JavaRDD<String>} in CSV format to a {@code FrameObject}
 *
 * @param javaRDD
 *            the Java RDD of strings
 * @param frameMetadata
 *            frame metadata
 * @return the {@code JavaRDD<String>} converted to a {@code FrameObject}
 */
public static FrameObject javaRDDStringCSVToFrameObject(JavaRDD<String> javaRDD, FrameMetadata frameMetadata) {
    JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
    MatrixCharacteristics mc = (frameMetadata != null) ? frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
    JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
    FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo), frameMetadata.getFrameSchema().getSchema().toArray(new ValueType[0]));
    JavaPairRDD<Long, FrameBlock> rdd;
    try {
        rdd = FrameRDDConverterUtils.csvToBinaryBlock(jsc(), javaPairRDDText, mc, frameObject.getSchema(), false, ",", false, -1);
    } catch (DMLRuntimeException e) {
        e.printStackTrace();
        return null;
    }
    frameObject.setRDDHandle(new RDDObject(rdd));
    return frameObject;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) ValueType(org.apache.sysml.parser.Expression.ValueType) Text(org.apache.hadoop.io.Text) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CopyTextInputFunction(org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction) ConvertStringToLongTextPair(org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) LongWritable(org.apache.hadoop.io.LongWritable)

Example 9 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class ReblockSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // set the output characteristics
    CacheableData<?> obj = sec.getCacheableData(input1.getName());
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    mcOut.set(mc.getRows(), mc.getCols(), brlen, bclen, mc.getNonZeros());
    // get the source format form the meta data
    MetaDataFormat iimd = (MetaDataFormat) obj.getMetaData();
    if (iimd == null)
        throw new DMLRuntimeException("Error: Metadata not found");
    InputInfo iinfo = iimd.getInputInfo();
    // check for in-memory reblock (w/ lazy spark context, potential for latency reduction)
    if (Recompiler.checkCPReblock(sec, input1.getName())) {
        if (input1.getDataType() == DataType.MATRIX)
            Recompiler.executeInMemoryMatrixReblock(sec, input1.getName(), output.getName());
        else if (input1.getDataType() == DataType.FRAME)
            Recompiler.executeInMemoryFrameReblock(sec, input1.getName(), output.getName());
        return;
    }
    // execute matrix/frame reblock
    if (input1.getDataType() == DataType.MATRIX)
        processMatrixReblockInstruction(sec, iinfo);
    else if (input1.getDataType() == DataType.FRAME)
        processFrameReblockInstruction(sec, iinfo);
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 10 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class ProgramConverter method serializeDataObject.

public static String serializeDataObject(String key, Data dat) {
    // SCHEMA: <name>|<datatype>|<valuetype>|value
    // (scalars are serialize by value, matrices by filename)
    StringBuilder sb = new StringBuilder();
    // prepare data for serialization
    String name = key;
    DataType datatype = dat.getDataType();
    ValueType valuetype = dat.getValueType();
    String value = null;
    String[] matrixMetaData = null;
    switch(datatype) {
        case SCALAR:
            ScalarObject so = (ScalarObject) dat;
            // name = so.getName();
            value = so.getStringValue();
            break;
        case MATRIX:
            MatrixObject mo = (MatrixObject) dat;
            MetaDataFormat md = (MetaDataFormat) dat.getMetaData();
            MatrixCharacteristics mc = md.getMatrixCharacteristics();
            value = mo.getFileName();
            PartitionFormat partFormat = (mo.getPartitionFormat() != null) ? new PartitionFormat(mo.getPartitionFormat(), mo.getPartitionSize()) : PartitionFormat.NONE;
            matrixMetaData = new String[9];
            matrixMetaData[0] = String.valueOf(mc.getRows());
            matrixMetaData[1] = String.valueOf(mc.getCols());
            matrixMetaData[2] = String.valueOf(mc.getRowsPerBlock());
            matrixMetaData[3] = String.valueOf(mc.getColsPerBlock());
            matrixMetaData[4] = String.valueOf(mc.getNonZeros());
            matrixMetaData[5] = InputInfo.inputInfoToString(md.getInputInfo());
            matrixMetaData[6] = OutputInfo.outputInfoToString(md.getOutputInfo());
            matrixMetaData[7] = String.valueOf(partFormat);
            matrixMetaData[8] = String.valueOf(mo.getUpdateType());
            break;
        default:
            throw new DMLRuntimeException("Unable to serialize datatype " + datatype);
    }
    // serialize data
    sb.append(name);
    sb.append(DATA_FIELD_DELIM);
    sb.append(datatype);
    sb.append(DATA_FIELD_DELIM);
    sb.append(valuetype);
    sb.append(DATA_FIELD_DELIM);
    sb.append(value);
    if (matrixMetaData != null)
        for (int i = 0; i < matrixMetaData.length; i++) {
            sb.append(DATA_FIELD_DELIM);
            sb.append(matrixMetaData[i]);
        }
    return sb.toString();
}
Also used : ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ValueType(org.apache.sysml.parser.Expression.ValueType) DataType(org.apache.sysml.parser.Expression.DataType) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)54 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)47 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)28 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)26 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)17 IOException (java.io.IOException)12 ValueType (org.apache.sysml.parser.Expression.ValueType)10 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)10 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)9 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)9 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)7 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)5 Path (org.apache.hadoop.fs.Path)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Text (org.apache.hadoop.io.Text)4 Data (org.apache.sysml.runtime.instructions.cp.Data)4 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)4 ConvertStringToLongTextPair (org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair)4 CopyTextInputFunction (org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction)4 DataOp (org.apache.sysml.hops.DataOp)3