use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class MLContextConversionUtil method javaRDDStringIJVToMatrixObject.
/**
* Convert a {@code JavaRDD<String>} in IJV format to a {@code MatrixObject}
* . Note that metadata is required for IJV format.
*
* @param javaRDD
* the Java RDD of strings
* @param matrixMetadata
* matrix metadata
* @return the {@code JavaRDD<String>} converted to a {@code MatrixObject}
*/
public static MatrixObject javaRDDStringIJVToMatrixObject(JavaRDD<String> javaRDD, MatrixMetadata matrixMetadata) {
JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
MatrixCharacteristics mc = (matrixMetadata != null) ? matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.TextCellOutputInfo, InputInfo.TextCellInputInfo));
JavaPairRDD<LongWritable, Text> javaPairRDD2 = javaPairRDD.mapToPair(new CopyTextInputFunction());
matrixObject.setRDDHandle(new RDDObject(javaPairRDD2));
return matrixObject;
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class MLContextConversionUtil method binaryBlocksToFrameObject.
/**
* Convert a {@code JavaPairRDD<Long, FrameBlock>} to a {@code FrameObject}.
*
* @param binaryBlocks
* {@code JavaPairRDD<Long, FrameBlock>} representation of a
* binary-block frame
* @param frameMetadata
* the frame metadata
* @return the {@code JavaPairRDD<Long, FrameBlock>} frame converted to a
* {@code FrameObject}
*/
public static FrameObject binaryBlocksToFrameObject(JavaPairRDD<Long, FrameBlock> binaryBlocks, FrameMetadata frameMetadata) {
MatrixCharacteristics mc = (frameMetadata != null) ? frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
ValueType[] schema = (frameMetadata != null) ? frameMetadata.getFrameSchema().getSchema().toArray(new ValueType[0]) : UtilFunctions.nCopies((int) mc.getCols(), ValueType.STRING);
FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo), schema);
frameObject.setRDDHandle(new RDDObject(binaryBlocks));
return frameObject;
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class MLContextConversionUtil method javaRDDStringCSVToFrameObject.
/**
* Convert a {@code JavaRDD<String>} in CSV format to a {@code FrameObject}
*
* @param javaRDD
* the Java RDD of strings
* @param frameMetadata
* frame metadata
* @return the {@code JavaRDD<String>} converted to a {@code FrameObject}
*/
public static FrameObject javaRDDStringCSVToFrameObject(JavaRDD<String> javaRDD, FrameMetadata frameMetadata) {
JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
MatrixCharacteristics mc = (frameMetadata != null) ? frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo), frameMetadata.getFrameSchema().getSchema().toArray(new ValueType[0]));
JavaPairRDD<Long, FrameBlock> rdd;
try {
rdd = FrameRDDConverterUtils.csvToBinaryBlock(jsc(), javaPairRDDText, mc, frameObject.getSchema(), false, ",", false, -1);
} catch (DMLRuntimeException e) {
e.printStackTrace();
return null;
}
frameObject.setRDDHandle(new RDDObject(rdd));
return frameObject;
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class ReblockSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// set the output characteristics
CacheableData<?> obj = sec.getCacheableData(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
mcOut.set(mc.getRows(), mc.getCols(), brlen, bclen, mc.getNonZeros());
// get the source format form the meta data
MetaDataFormat iimd = (MetaDataFormat) obj.getMetaData();
if (iimd == null)
throw new DMLRuntimeException("Error: Metadata not found");
InputInfo iinfo = iimd.getInputInfo();
// check for in-memory reblock (w/ lazy spark context, potential for latency reduction)
if (Recompiler.checkCPReblock(sec, input1.getName())) {
if (input1.getDataType() == DataType.MATRIX)
Recompiler.executeInMemoryMatrixReblock(sec, input1.getName(), output.getName());
else if (input1.getDataType() == DataType.FRAME)
Recompiler.executeInMemoryFrameReblock(sec, input1.getName(), output.getName());
return;
}
// execute matrix/frame reblock
if (input1.getDataType() == DataType.MATRIX)
processMatrixReblockInstruction(sec, iinfo);
else if (input1.getDataType() == DataType.FRAME)
processFrameReblockInstruction(sec, iinfo);
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class ProgramConverter method serializeDataObject.
public static String serializeDataObject(String key, Data dat) {
// SCHEMA: <name>|<datatype>|<valuetype>|value
// (scalars are serialize by value, matrices by filename)
StringBuilder sb = new StringBuilder();
// prepare data for serialization
String name = key;
DataType datatype = dat.getDataType();
ValueType valuetype = dat.getValueType();
String value = null;
String[] matrixMetaData = null;
switch(datatype) {
case SCALAR:
ScalarObject so = (ScalarObject) dat;
// name = so.getName();
value = so.getStringValue();
break;
case MATRIX:
MatrixObject mo = (MatrixObject) dat;
MetaDataFormat md = (MetaDataFormat) dat.getMetaData();
MatrixCharacteristics mc = md.getMatrixCharacteristics();
value = mo.getFileName();
PartitionFormat partFormat = (mo.getPartitionFormat() != null) ? new PartitionFormat(mo.getPartitionFormat(), mo.getPartitionSize()) : PartitionFormat.NONE;
matrixMetaData = new String[9];
matrixMetaData[0] = String.valueOf(mc.getRows());
matrixMetaData[1] = String.valueOf(mc.getCols());
matrixMetaData[2] = String.valueOf(mc.getRowsPerBlock());
matrixMetaData[3] = String.valueOf(mc.getColsPerBlock());
matrixMetaData[4] = String.valueOf(mc.getNonZeros());
matrixMetaData[5] = InputInfo.inputInfoToString(md.getInputInfo());
matrixMetaData[6] = OutputInfo.outputInfoToString(md.getOutputInfo());
matrixMetaData[7] = String.valueOf(partFormat);
matrixMetaData[8] = String.valueOf(mo.getUpdateType());
break;
default:
throw new DMLRuntimeException("Unable to serialize datatype " + datatype);
}
// serialize data
sb.append(name);
sb.append(DATA_FIELD_DELIM);
sb.append(datatype);
sb.append(DATA_FIELD_DELIM);
sb.append(valuetype);
sb.append(DATA_FIELD_DELIM);
sb.append(value);
if (matrixMetaData != null)
for (int i = 0; i < matrixMetaData.length; i++) {
sb.append(DATA_FIELD_DELIM);
sb.append(matrixMetaData[i]);
}
return sb.toString();
}
Aggregations