use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.
the class DataConverter method convertToMatrixBlock.
/**
* Converts a frame block with arbitrary schema into a matrix block.
* Since matrix block only supports value type double, we do a best
* effort conversion of non-double types which might result in errors
* for non-numerical data.
*
* @param frame frame block
* @return matrix block
*/
public static MatrixBlock convertToMatrixBlock(FrameBlock frame) {
int m = frame.getNumRows();
int n = frame.getNumColumns();
MatrixBlock mb = new MatrixBlock(m, n, false);
mb.allocateDenseBlock();
ValueType[] schema = frame.getSchema();
int dFreq = UtilFunctions.frequency(schema, ValueType.DOUBLE);
if (dFreq == schema.length) {
// special case double schema (without cell-object creation,
// cache-friendly row-column copy)
double[][] a = new double[n][];
double[] c = mb.getDenseBlockValues();
for (int j = 0; j < n; j++) a[j] = (double[]) frame.getColumnData(j);
// blocks of a+overhead/c in L1 cache
int blocksizeIJ = 16;
for (int bi = 0; bi < m; bi += blocksizeIJ) for (int bj = 0; bj < n; bj += blocksizeIJ) {
int bimin = Math.min(bi + blocksizeIJ, m);
int bjmin = Math.min(bj + blocksizeIJ, n);
for (int i = bi, aix = bi * n; i < bimin; i++, aix += n) for (int j = bj; j < bjmin; j++) c[aix + j] = a[j][i];
}
} else {
// general case
for (int i = 0; i < frame.getNumRows(); i++) for (int j = 0; j < frame.getNumColumns(); j++) {
mb.appendValue(i, j, UtilFunctions.objectToDouble(schema[j], frame.get(i, j)));
}
}
// post-processing
mb.examSparsity();
return mb;
}
use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.
the class FrameCastingTest method runFrameCastingTest.
/**
* @param sparseM1
* @param sparseM2
* @param instType
*/
private void runFrameCastingTest(ValueType[] schema, CastType ctype) {
try {
// data generation
double[][] A = getRandomMatrix(rows, schema.length, -10, 10, 0.9, 2412);
for (int i = 0; i < rows; i++) {
for (int j = 0; j < schema.length; j++) A[i][j] = UtilFunctions.objectToDouble(schema[j], UtilFunctions.doubleToObject(schema[j], A[i][j]));
}
// core casting operations
FrameBlock frame = null;
if (ctype == CastType.F2M) {
// construct input schema
FrameBlock frame1 = new FrameBlock(schema);
Object[] row1 = new Object[schema.length];
for (int i = 0; i < rows; i++) {
for (int j = 0; j < schema.length; j++) row1[j] = UtilFunctions.doubleToObject(schema[j], A[i][j]);
frame1.appendRow(row1);
}
MatrixBlock mb = DataConverter.convertToMatrixBlock(frame1);
frame = DataConverter.convertToFrameBlock(mb);
} else if (ctype == CastType.M2F_G) {
MatrixBlock mb = DataConverter.convertToMatrixBlock(A);
frame = DataConverter.convertToFrameBlock(mb);
} else if (ctype == CastType.M2F_S) {
MatrixBlock mb = DataConverter.convertToMatrixBlock(A);
frame = DataConverter.convertToFrameBlock(mb, schema);
}
// check basic meta data
if (frame.getNumRows() != rows)
Assert.fail("Wrong number of rows: " + frame.getNumRows() + ", expected: " + rows);
// check correct values
ValueType[] lschema = frame.getSchema();
for (int i = 0; i < rows; i++) for (int j = 0; j < lschema.length; j++) {
double tmp = UtilFunctions.objectToDouble(lschema[j], frame.get(i, j));
if (tmp != A[i][j])
Assert.fail("Wrong get value for cell (" + i + "," + j + "): " + tmp + ", expected: " + A[i][j]);
}
} catch (Exception ex) {
ex.printStackTrace();
throw new RuntimeException(ex);
}
}
use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.
the class FrameIndexingDistTest method runTestLeftIndexing.
private void runTestLeftIndexing(ExecType et, LeftIndexingOp.LeftIndexingMethod indexingMethod, ValueType[] schema, IXType itype, boolean bSparse) throws IOException {
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
RUNTIME_PLATFORM oldRTP = rtplatform;
TestConfiguration config = null;
HashMap<String, ValueType[]> outputSchema = new HashMap<String, ValueType[]>();
if (itype == IXType.LIX)
config = getTestConfiguration("FrameLeftIndexing");
else
config = getTestConfiguration("FrameRightIndexing");
try {
if (indexingMethod != null) {
LeftIndexingOp.FORCED_LEFT_INDEXING = indexingMethod;
}
if (et == ExecType.SPARK) {
rtplatform = RUNTIME_PLATFORM.SPARK;
} else {
// rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE;
rtplatform = RUNTIME_PLATFORM.HYBRID;
}
if (rtplatform == RUNTIME_PLATFORM.SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
config.addVariable("rows", rows);
config.addVariable("cols", cols);
long rowstart = 816, rowend = 1229, colstart = 109, /*967*/
colend = 1009;
config.addVariable("rowstart", rowstart);
config.addVariable("rowend", rowend);
config.addVariable("colstart", colstart);
config.addVariable("colend", colend);
loadTestConfiguration(config);
if (itype == IXType.LIX) {
/* This is for running the junit test the new way, i.e., construct the arguments directly */
String LI_HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = LI_HOME + TEST_NAME + ".dml";
programArgs = new String[] { "-args", input("A"), Long.toString(rows), Long.toString(cols), Long.toString(rowstart), Long.toString(rowend), Long.toString(colstart), Long.toString(colend), output("AB"), output("AC"), output("AD"), input("B"), input("C"), input("D"), Long.toString(rowend - rowstart + 1), Long.toString(colend - colstart + 1), Long.toString(cols - colstart + 1) };
fullRScriptName = LI_HOME + TEST_NAME + ".R";
rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + rowstart + " " + rowend + " " + colstart + " " + colend + " " + expectedDir();
// initialize the frame data.
// rand.nextDouble();
double sparsity = sparsity1;
double[][] A = getRandomMatrix(rows, cols, min, max, sparsity, 1111);
writeInputFrameWithMTD("A", A, true, schema, OutputInfo.BinaryBlockOutputInfo);
// rand.nextDouble();
sparsity = sparsity3;
double[][] B = getRandomMatrix((int) (rowend - rowstart + 1), (int) (colend - colstart + 1), min, max, sparsity, 2345);
ValueType[] lschemaB = Arrays.copyOfRange(schema, (int) colstart - 1, (int) colend);
writeInputFrameWithMTD("B", B, true, lschemaB, OutputInfo.BinaryBlockOutputInfo);
// rand.nextDouble();
sparsity = sparsity2;
double[][] C = getRandomMatrix((int) (rowend), (int) (cols - colstart + 1), min, max, sparsity, 3267);
ValueType[] lschemaC = Arrays.copyOfRange(schema, (int) colstart - 1, (int) cols);
writeInputFrameWithMTD("C", C, true, lschemaC, OutputInfo.BinaryBlockOutputInfo);
// rand.nextDoublBe();
sparsity = sparsity4;
double[][] D = getRandomMatrix(rows, (int) (colend - colstart + 1), min, max, sparsity, 4856);
writeInputFrameWithMTD("D", D, true, lschemaB, OutputInfo.BinaryBlockOutputInfo);
boolean exceptionExpected = false;
int expectedNumberOfJobs = -1;
runTest(true, exceptionExpected, null, expectedNumberOfJobs);
for (String file : config.getOutputFiles()) outputSchema.put(file, schema);
} else {
/* This is for running the junit test the new way, i.e., construct the arguments directly */
String RI_HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = RI_HOME + RTEST_NAME + ".dml";
programArgs = new String[] { "-stats", "-explain", "-args", input("A"), Long.toString(rows), Long.toString(cols), Long.toString(rowstart), Long.toString(rowend), Long.toString(colstart), Long.toString(colend), output("B"), output("C"), output("D") };
fullRScriptName = RI_HOME + RTEST_NAME + ".R";
rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + rowstart + " " + rowend + " " + colstart + " " + colend + " " + expectedDir();
// initialize the frame data.
double sparsity = bSparse ? sparsity4 : sparsity2;
double[][] A = getRandomMatrix(rows, cols, min, max, sparsity, 1111);
writeInputFrameWithMTD("A", A, true, schema, OutputInfo.BinaryBlockOutputInfo);
ValueType[] schemaB = new ValueType[(int) (colend - colstart + 1)];
System.arraycopy(schema, (int) (colstart - 1), schemaB, 0, (int) (colend - colstart + 1));
outputSchema.put(config.getOutputFiles()[0], schemaB);
ValueType[] schemaC = new ValueType[(int) (cols - colstart + 1)];
System.arraycopy(schema, (int) (colstart - 1), schemaC, 0, (int) (cols - colstart + 1));
outputSchema.put(config.getOutputFiles()[1], schemaC);
outputSchema.put(config.getOutputFiles()[2], schemaB);
boolean exceptionExpected = false;
int expectedNumberOfJobs = -1;
runTest(true, exceptionExpected, null, expectedNumberOfJobs);
}
} catch (Exception ex) {
ex.printStackTrace();
throw new RuntimeException(ex);
} finally {
rtplatform = oldRTP;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
LeftIndexingOp.FORCED_LEFT_INDEXING = null;
}
runRScript(true);
for (String file : config.getOutputFiles()) {
FrameBlock frameBlock = readDMLFrameFromHDFS(file, InputInfo.BinaryBlockInputInfo);
MatrixCharacteristics md = new MatrixCharacteristics(frameBlock.getNumRows(), frameBlock.getNumColumns(), -1, -1);
FrameBlock frameRBlock = readRFrameFromHDFS(file + ".csv", InputInfo.CSVInputInfo, md);
ValueType[] schemaOut = outputSchema.get(file);
verifyFrameData(frameBlock, frameRBlock, schemaOut);
System.out.println("File processed is " + file);
}
}
use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.
the class FrameIndexingTest method runFrameIndexingTest.
/**
* @param sparseM1
* @param sparseM2
* @param instType
*/
private void runFrameIndexingTest(ValueType[] schema, IXType itype) {
try {
// data generation
double[][] A = getRandomMatrix(rows, schema.length, -10, 10, 0.9, 2412);
// init data frame 1
FrameBlock frame1 = new FrameBlock(schema);
Object[] row1 = new Object[schema.length];
for (int i = 0; i < rows; i++) {
for (int j = 0; j < schema.length; j++) A[i][j] = UtilFunctions.objectToDouble(schema[j], row1[j] = UtilFunctions.doubleToObject(schema[j], A[i][j]));
frame1.appendRow(row1);
}
// core indexing operation
MatrixBlock mbC = null;
FrameBlock frame3 = null;
if (itype == IXType.RIX) {
// matrix indexing
MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
mbC = mbA.slice(rl, ru, cl, cu, new MatrixBlock());
// frame indexing
frame3 = frame1.slice(rl, ru, cl, cu, new FrameBlock());
} else if (itype == IXType.LIX) {
// data generation
double[][] B = getRandomMatrix(ru - rl + 1, cu - cl + 1, -10, 10, 0.9, 7);
// init data frame 2
ValueType[] lschema2 = new ValueType[cu - cl + 1];
for (int j = cl; j <= cu; j++) lschema2[j - cl] = schema[j];
FrameBlock frame2 = new FrameBlock(lschema2);
Object[] row2 = new Object[lschema2.length];
for (int i = 0; i < ru - rl + 1; i++) {
for (int j = 0; j < lschema2.length; j++) B[i][j] = UtilFunctions.objectToDouble(lschema2[j], row2[j] = UtilFunctions.doubleToObject(lschema2[j], B[i][j]));
frame2.appendRow(row2);
}
// matrix indexing
MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
MatrixBlock mbB = DataConverter.convertToMatrixBlock(B);
mbC = mbA.leftIndexingOperations(mbB, rl, ru, cl, cu, new MatrixBlock(), UpdateType.COPY);
// frame indexing
frame3 = frame1.leftIndexingOperations(frame2, rl, ru, cl, cu, new FrameBlock());
}
// check basic meta data
if (frame3.getNumRows() != mbC.getNumRows())
Assert.fail("Wrong number of rows: " + frame3.getNumRows() + ", expected: " + mbC.getNumRows());
// check correct values
ValueType[] lschema = frame3.getSchema();
for (int i = 0; i < ru - rl + 1; i++) for (int j = 0; j < lschema.length; j++) {
double tmp = UtilFunctions.objectToDouble(lschema[j], frame3.get(i, j));
if (tmp != mbC.quickGetValue(i, j))
Assert.fail("Wrong get value for cell (" + i + "," + j + "): " + tmp + ", expected: " + mbC.quickGetValue(i, j));
}
} catch (Exception ex) {
ex.printStackTrace();
throw new RuntimeException(ex);
}
}
use of org.apache.sysml.parser.Expression.ValueType in project incubator-systemml by apache.
the class FrameMatrixCastingTest method runFrameCastingTest.
/**
* @param testname
* @param schema
* @param wildcard
*/
private void runFrameCastingTest(String testname, boolean multColBlks, ValueType vt, ExecType et) {
// rtplatform for MR
RUNTIME_PLATFORM platformOld = rtplatform;
switch(et) {
case MR:
rtplatform = RUNTIME_PLATFORM.HADOOP;
break;
case SPARK:
rtplatform = RUNTIME_PLATFORM.SPARK;
break;
default:
rtplatform = RUNTIME_PLATFORM.HYBRID;
break;
}
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
if (rtplatform == RUNTIME_PLATFORM.SPARK)
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
try {
int cols = multColBlks ? cols2 : cols1;
TestConfiguration config = getTestConfiguration(testname);
loadTestConfiguration(config);
String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + testname + ".dml";
programArgs = new String[] { "-explain", "-args", input("A"), output("B") };
// data generation
double[][] A = getRandomMatrix(rows, cols, -1, 1, 0.9, 7);
DataType dtin = testname.equals(TEST_NAME1) ? DataType.FRAME : DataType.MATRIX;
ValueType vtin = testname.equals(TEST_NAME1) ? vt : ValueType.DOUBLE;
writeMatrixOrFrameInput(input("A"), A, rows, cols, dtin, vtin);
// run testcase
runTest(true, false, null, -1);
// compare matrices
DataType dtout = testname.equals(TEST_NAME1) ? DataType.MATRIX : DataType.FRAME;
double[][] B = readMatrixOrFrameInput(output("B"), rows, cols, dtout);
TestUtils.compareMatrices(A, B, rows, cols, 0);
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
rtplatform = platformOld;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
}
}
Aggregations