use of org.apache.sysml.api.mlcontext.MLResults in project incubator-systemml by apache.
the class MLContextFrameTest method testInputFrameAndMatrixOutputMatrix.
@Test
public void testInputFrameAndMatrixOutputMatrix() {
System.out.println("MLContextFrameTest - input frame and matrix, output matrix");
List<String> dataA = new ArrayList<String>();
dataA.add("Test1,4.0");
dataA.add("Test2,5.0");
dataA.add("Test3,6.0");
JavaRDD<String> javaRddStringA = sc.parallelize(dataA);
ValueType[] schema = { ValueType.STRING, ValueType.DOUBLE };
List<String> dataB = new ArrayList<String>();
dataB.add("1.0");
dataB.add("2.0");
JavaRDD<String> javaRddStringB = sc.parallelize(dataB);
JavaRDD<Row> javaRddRowA = FrameRDDConverterUtils.csvToRowRDD(sc, javaRddStringA, CSV_DELIM, schema);
JavaRDD<Row> javaRddRowB = javaRddStringB.map(new CommaSeparatedValueStringToDoubleArrayRow());
List<StructField> fieldsA = new ArrayList<StructField>();
fieldsA.add(DataTypes.createStructField("1", DataTypes.StringType, true));
fieldsA.add(DataTypes.createStructField("2", DataTypes.DoubleType, true));
StructType schemaA = DataTypes.createStructType(fieldsA);
Dataset<Row> dataFrameA = spark.createDataFrame(javaRddRowA, schemaA);
List<StructField> fieldsB = new ArrayList<StructField>();
fieldsB.add(DataTypes.createStructField("1", DataTypes.DoubleType, true));
StructType schemaB = DataTypes.createStructType(fieldsB);
Dataset<Row> dataFrameB = spark.createDataFrame(javaRddRowB, schemaB);
String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: true ,recode: [ 1, 2 ]}\");\n" + "C = tA %*% B;\n" + "M = s * C;";
Script script = dml(dmlString).in("A", dataFrameA, new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length)).in("B", dataFrameB, new MatrixMetadata(MatrixFormat.CSV, dataFrameB.count(), (long) dataFrameB.columns().length)).in("s", 2).out("M");
MLResults results = ml.execute(script);
double[][] matrix = results.getMatrixAs2DDoubleArray("M");
Assert.assertEquals(6.0, matrix[0][0], 0.0);
Assert.assertEquals(12.0, matrix[1][0], 0.0);
Assert.assertEquals(18.0, matrix[2][0], 0.0);
}
use of org.apache.sysml.api.mlcontext.MLResults in project incubator-systemml by apache.
the class MLContextOptLevelTest method runMLContextOptLevelTest.
private void runMLContextOptLevelTest(int optLevel) {
try {
String s = "R = sum(matrix(0," + rows + "," + cols + ") + 7);";
ml.setExplain(true);
ml.setExplainLevel(ExplainLevel.RUNTIME);
ml.setStatistics(true);
ml.setConfigProperty(DMLConfig.OPTIMIZATION_LEVEL, String.valueOf(optLevel));
Script script = dml(s).out("R");
MLResults results = ml.execute(script);
// check result correctness
TestUtils.compareScalars(results.getDouble("R"), rows * cols * 7, 0.000001);
// check correct opt level
Assert.assertTrue(heavyHittersContainsString("+") == (optLevel == 1));
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
use of org.apache.sysml.api.mlcontext.MLResults in project incubator-systemml by apache.
the class MLContextOutputBlocksizeTest method runMLContextOutputBlocksizeTest.
private void runMLContextOutputBlocksizeTest(String format) {
try {
double[][] A = getRandomMatrix(rows, cols, -10, 10, sparsity, 76543);
MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
int blksz = ConfigurationManager.getBlocksize();
MatrixCharacteristics mc = new MatrixCharacteristics(rows, cols, blksz, blksz, mbA.getNonZeros());
// create input dataset
JavaPairRDD<MatrixIndexes, MatrixBlock> in = SparkExecutionContext.toMatrixJavaPairRDD(sc, mbA, blksz, blksz);
Matrix m = new Matrix(in, new MatrixMetadata(mc));
ml.setExplain(true);
ml.setExplainLevel(ExplainLevel.HOPS);
// execute script
String s = "if( sum(X) > 0 )" + " X = X/2;" + "R = X;" + "write(R, \"/tmp\", format=\"" + format + "\");";
Script script = dml(s).in("X", m).out("R");
MLResults results = ml.execute(script);
// compare output matrix characteristics
MatrixCharacteristics mcOut = results.getMatrix("R").getMatrixMetadata().asMatrixCharacteristics();
Assert.assertEquals(blksz, mcOut.getRowsPerBlock());
Assert.assertEquals(blksz, mcOut.getColsPerBlock());
} catch (Exception ex) {
ex.printStackTrace();
throw new RuntimeException(ex);
}
}
use of org.apache.sysml.api.mlcontext.MLResults in project incubator-systemml by apache.
the class MLContextParforDatasetTest method runMLContextParforDatasetTest.
private void runMLContextParforDatasetTest(boolean vector, boolean unknownDims, boolean multiInputs) {
// modify memory budget to trigger fused datapartition-execute
long oldmem = InfrastructureAnalyzer.getLocalMaxMemory();
// 1MB
InfrastructureAnalyzer.setLocalMaxMemory(1 * 1024 * 1024);
try {
double[][] A = getRandomMatrix(rows, cols, -10, 10, sparsity, 76543);
MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
int blksz = ConfigurationManager.getBlocksize();
MatrixCharacteristics mc1 = new MatrixCharacteristics(rows, cols, blksz, blksz, mbA.getNonZeros());
MatrixCharacteristics mc2 = unknownDims ? new MatrixCharacteristics() : new MatrixCharacteristics(mc1);
// create input dataset
SparkSession sparkSession = SparkSession.builder().sparkContext(sc.sc()).getOrCreate();
JavaPairRDD<MatrixIndexes, MatrixBlock> in = SparkExecutionContext.toMatrixJavaPairRDD(sc, mbA, blksz, blksz);
Dataset<Row> df = RDDConverterUtils.binaryBlockToDataFrame(sparkSession, in, mc1, vector);
MatrixMetadata mm = new MatrixMetadata(vector ? MatrixFormat.DF_VECTOR_WITH_INDEX : MatrixFormat.DF_DOUBLES_WITH_INDEX);
mm.setMatrixCharacteristics(mc2);
String s1 = "v = matrix(0, rows=nrow(X), cols=1)" + "parfor(i in 1:nrow(X), log=DEBUG) {" + " v[i, ] = sum(X[i, ]);" + "}" + "r = sum(v);";
String s2 = "v = matrix(0, rows=nrow(X), cols=1)" + "Y = X;" + "parfor(i in 1:nrow(X), log=DEBUG) {" + " v[i, ] = sum(X[i, ]+Y[i, ]);" + "}" + "r = sum(v);";
String s = multiInputs ? s2 : s1;
ml.setExplain(true);
ml.setExplainLevel(ExplainLevel.RUNTIME);
ml.setStatistics(true);
Script script = dml(s).in("X", df, mm).out("r");
MLResults results = ml.execute(script);
// compare aggregation results
double sum1 = results.getDouble("r");
double sum2 = mbA.sum() * (multiInputs ? 2 : 1);
TestUtils.compareScalars(sum2, sum1, 0.000001);
} catch (Exception ex) {
ex.printStackTrace();
throw new RuntimeException(ex);
} finally {
InfrastructureAnalyzer.setLocalMaxMemory(oldmem);
}
}
use of org.apache.sysml.api.mlcontext.MLResults in project incubator-systemml by apache.
the class FrameTest method testFrameGeneral.
private void testFrameGeneral(InputInfo iinfo, OutputInfo oinfo, boolean bFromDataFrame, boolean bToDataFrame) throws IOException, DMLException, ParseException {
boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG;
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
RUNTIME_PLATFORM oldRT = DMLScript.rtplatform;
DMLScript.rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK;
int rowstart = 234, rowend = 1478, colstart = 125, colend = 568;
int bRows = rowend - rowstart + 1, bCols = colend - colstart + 1;
int rowstartC = 124, rowendC = 1178, colstartC = 143, colendC = 368;
int cRows = rowendC - rowstartC + 1, cCols = colendC - colstartC + 1;
HashMap<String, ValueType[]> outputSchema = new HashMap<String, ValueType[]>();
HashMap<String, MatrixCharacteristics> outputMC = new HashMap<String, MatrixCharacteristics>();
TestConfiguration config = getTestConfiguration(TEST_NAME);
loadTestConfiguration(config);
List<String> proArgs = new ArrayList<String>();
proArgs.add(input("A"));
proArgs.add(Integer.toString(rows));
proArgs.add(Integer.toString(cols));
proArgs.add(input("B"));
proArgs.add(Integer.toString(bRows));
proArgs.add(Integer.toString(bCols));
proArgs.add(Integer.toString(rowstart));
proArgs.add(Integer.toString(rowend));
proArgs.add(Integer.toString(colstart));
proArgs.add(Integer.toString(colend));
proArgs.add(output("A"));
proArgs.add(Integer.toString(rowstartC));
proArgs.add(Integer.toString(rowendC));
proArgs.add(Integer.toString(colstartC));
proArgs.add(Integer.toString(colendC));
proArgs.add(output("C"));
fullDMLScriptName = SCRIPT_DIR + TEST_DIR + TEST_NAME + ".dml";
ValueType[] schema = schemaMixedLarge;
// initialize the frame data.
List<ValueType> lschema = Arrays.asList(schema);
fullRScriptName = SCRIPT_DIR + TEST_DIR + TEST_NAME + ".R";
rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + rowstart + " " + rowend + " " + colstart + " " + colend + " " + expectedDir() + " " + rowstartC + " " + rowendC + " " + colstartC + " " + colendC;
double sparsity = sparsity1;
double[][] A = getRandomMatrix(rows, cols, min, max, sparsity, 1111);
writeInputFrameWithMTD("A", A, true, schema, oinfo);
sparsity = sparsity2;
double[][] B = getRandomMatrix((int) (bRows), (int) (bCols), min, max, sparsity, 2345);
ValueType[] schemaB = new ValueType[bCols];
for (int i = 0; i < bCols; ++i) schemaB[i] = schema[colstart - 1 + i];
List<ValueType> lschemaB = Arrays.asList(schemaB);
writeInputFrameWithMTD("B", B, true, schemaB, oinfo);
ValueType[] schemaC = new ValueType[colendC - colstartC + 1];
for (int i = 0; i < cCols; ++i) schemaC[i] = schema[colstartC - 1 + i];
Dataset<Row> dfA = null, dfB = null;
if (bFromDataFrame) {
// Create DataFrame for input A
StructType dfSchemaA = FrameRDDConverterUtils.convertFrameSchemaToDFSchema(schema, false);
JavaRDD<Row> rowRDDA = FrameRDDConverterUtils.csvToRowRDD(sc, input("A"), DataExpression.DEFAULT_DELIM_DELIMITER, schema);
dfA = spark.createDataFrame(rowRDDA, dfSchemaA);
// Create DataFrame for input B
StructType dfSchemaB = FrameRDDConverterUtils.convertFrameSchemaToDFSchema(schemaB, false);
JavaRDD<Row> rowRDDB = FrameRDDConverterUtils.csvToRowRDD(sc, input("B"), DataExpression.DEFAULT_DELIM_DELIMITER, schemaB);
dfB = spark.createDataFrame(rowRDDB, dfSchemaB);
}
try {
Script script = ScriptFactory.dmlFromFile(fullDMLScriptName);
String format = "csv";
if (oinfo == OutputInfo.TextCellOutputInfo)
format = "text";
if (bFromDataFrame) {
script.in("A", dfA);
} else {
JavaRDD<String> aIn = sc.textFile(input("A"));
FrameSchema fs = new FrameSchema(lschema);
FrameFormat ff = (format.equals("text")) ? FrameFormat.IJV : FrameFormat.CSV;
FrameMetadata fm = new FrameMetadata(ff, fs, rows, cols);
script.in("A", aIn, fm);
}
if (bFromDataFrame) {
script.in("B", dfB);
} else {
JavaRDD<String> bIn = sc.textFile(input("B"));
FrameSchema fs = new FrameSchema(lschemaB);
FrameFormat ff = (format.equals("text")) ? FrameFormat.IJV : FrameFormat.CSV;
FrameMetadata fm = new FrameMetadata(ff, fs, bRows, bCols);
script.in("B", bIn, fm);
}
// Output one frame to HDFS and get one as RDD //TODO HDFS input/output to do
script.out("A", "C");
// set positional argument values
for (int argNum = 1; argNum <= proArgs.size(); argNum++) {
script.in("$" + argNum, proArgs.get(argNum - 1));
}
MLResults results = ml.execute(script);
format = "csv";
if (iinfo == InputInfo.TextCellInputInfo)
format = "text";
String fName = output("AB");
try {
MapReduceTool.deleteFileIfExistOnHDFS(fName);
} catch (IOException e) {
throw new DMLRuntimeException("Error: While deleting file on HDFS");
}
if (!bToDataFrame) {
if (format.equals("text")) {
JavaRDD<String> javaRDDStringIJV = results.getJavaRDDStringIJV("A");
javaRDDStringIJV.saveAsTextFile(fName);
} else {
JavaRDD<String> javaRDDStringCSV = results.getJavaRDDStringCSV("A");
javaRDDStringCSV.saveAsTextFile(fName);
}
} else {
Dataset<Row> df = results.getDataFrame("A");
// Convert back DataFrame to binary block for comparison using original binary to converted DF and back to binary
MatrixCharacteristics mc = new MatrixCharacteristics(rows, cols, -1, -1, -1);
JavaPairRDD<LongWritable, FrameBlock> rddOut = FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc, bFromDataFrame).mapToPair(new LongFrameToLongWritableFrameFunction());
rddOut.saveAsHadoopFile(output("AB"), LongWritable.class, FrameBlock.class, OutputInfo.BinaryBlockOutputInfo.outputFormatClass);
}
fName = output("C");
try {
MapReduceTool.deleteFileIfExistOnHDFS(fName);
} catch (IOException e) {
throw new DMLRuntimeException("Error: While deleting file on HDFS");
}
if (!bToDataFrame) {
if (format.equals("text")) {
JavaRDD<String> javaRDDStringIJV = results.getJavaRDDStringIJV("C");
javaRDDStringIJV.saveAsTextFile(fName);
} else {
JavaRDD<String> javaRDDStringCSV = results.getJavaRDDStringCSV("C");
javaRDDStringCSV.saveAsTextFile(fName);
}
} else {
Dataset<Row> df = results.getDataFrame("C");
// Convert back DataFrame to binary block for comparison using original binary to converted DF and back to binary
MatrixCharacteristics mc = new MatrixCharacteristics(cRows, cCols, -1, -1, -1);
JavaPairRDD<LongWritable, FrameBlock> rddOut = FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc, bFromDataFrame).mapToPair(new LongFrameToLongWritableFrameFunction());
rddOut.saveAsHadoopFile(fName, LongWritable.class, FrameBlock.class, OutputInfo.BinaryBlockOutputInfo.outputFormatClass);
}
runRScript(true);
outputSchema.put("AB", schema);
outputMC.put("AB", new MatrixCharacteristics(rows, cols, -1, -1));
outputSchema.put("C", schemaC);
outputMC.put("C", new MatrixCharacteristics(cRows, cCols, -1, -1));
for (String file : config.getOutputFiles()) {
MatrixCharacteristics md = outputMC.get(file);
FrameBlock frameBlock = readDMLFrameFromHDFS(file, iinfo, md);
FrameBlock frameRBlock = readRFrameFromHDFS(file + ".csv", InputInfo.CSVInputInfo, md);
ValueType[] schemaOut = outputSchema.get(file);
verifyFrameData(frameBlock, frameRBlock, schemaOut);
System.out.println("File " + file + " processed successfully.");
}
System.out.println("Frame MLContext test completed successfully.");
} finally {
DMLScript.rtplatform = oldRT;
DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig;
}
}
Aggregations