Search in sources :

Example 61 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class FrameMetaReadWriteTest method runFrameReadWriteTest.

/**
 * @param sparseM1
 * @param sparseM2
 * @param instType
 */
private void runFrameReadWriteTest(OutputInfo oinfo, ExecType et) {
    // rtplatform for MR
    RUNTIME_PLATFORM platformOld = rtplatform;
    switch(et) {
        case MR:
            rtplatform = RUNTIME_PLATFORM.HADOOP;
            break;
        case SPARK:
            rtplatform = RUNTIME_PLATFORM.SPARK;
            break;
        default:
            rtplatform = RUNTIME_PLATFORM.HYBRID;
            break;
    }
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    String ofmt = OutputInfo.outputInfoToStringExternal(oinfo);
    try {
        TestConfiguration config = getTestConfiguration(TEST_NAME);
        loadTestConfiguration(config);
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + TEST_NAME + ".dml";
        programArgs = new String[] { "-explain", "-args", input("A"), String.valueOf(rows), String.valueOf(cols), ofmt, output("B") };
        // data generation and write input
        double[][] A = getRandomMatrix(rows, cols, -10, 10, 0.7, 3412);
        FrameBlock fA = DataConverter.convertToFrameBlock(DataConverter.convertToMatrixBlock(A), ValueType.STRING);
        for (int j = 0; j < cols; j++) {
            fA.getColumnMetadata(j).setMvValue(String.valueOf(j + 1));
            fA.getColumnMetadata(j).setNumDistinct(j + 1);
        }
        FrameWriterFactory.createFrameWriter(oinfo).writeFrameToHDFS(fA, input("A"), rows, cols);
        // run testcase
        runTest(true, false, null, -1);
        // read output and compare meta data
        FrameBlock fB = FrameReaderFactory.createFrameReader(OutputInfo.getMatchingInputInfo(oinfo)).readFrameFromHDFS(output("B"), rows, cols);
        for (int j = 0; j < cols; j++) {
            Assert.assertEquals("MV meta data wrong!", fA.getColumnMetadata(j).getMvValue(), fB.getColumnMetadata(j).getMvValue());
            Assert.assertEquals("Distinct meta data wrong!", fA.getColumnMetadata(j).getNumDistinct(), fB.getColumnMetadata(j).getNumDistinct());
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    } finally {
        rtplatform = platformOld;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
    }
}
Also used : RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) TestConfiguration(org.apache.sysml.test.integration.TestConfiguration)

Example 62 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class FrameReadWriteTest method writeAndVerifyData.

void writeAndVerifyData(OutputInfo oinfo, FrameBlock frame1, FrameBlock frame2, CSVFileFormatProperties fprop) throws IOException {
    String fname1 = SCRIPT_DIR + TEST_DIR + "/frameData1";
    String fname2 = SCRIPT_DIR + TEST_DIR + "/frameData2";
    // Create reader/writer
    FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo, fprop);
    FrameReader reader = FrameReaderFactory.createFrameReader(OutputInfo.getMatchingInputInfo(oinfo), fprop);
    // Write frame data to disk
    writer.writeFrameToHDFS(frame1, fname1, frame1.getNumRows(), frame1.getNumColumns());
    writer.writeFrameToHDFS(frame2, fname2, frame2.getNumRows(), frame2.getNumColumns());
    // Read frame data from disk
    FrameBlock frame1Read = reader.readFrameFromHDFS(fname1, frame1.getSchema(), frame1.getNumRows(), frame1.getNumColumns());
    FrameBlock frame2Read = reader.readFrameFromHDFS(fname2, frame2.getSchema(), frame2.getNumRows(), frame2.getNumColumns());
    // Verify that data read with original frames
    verifyFrameData(frame1, frame1Read);
    verifyFrameData(frame2, frame2Read);
    MapReduceTool.deleteFileIfExistOnHDFS(fname1);
    MapReduceTool.deleteFileIfExistOnHDFS(fname2);
}
Also used : FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader) FrameWriter(org.apache.sysml.runtime.io.FrameWriter)

Example 63 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class TransformFrameEncodeColmapTest method runTransformTest.

private void runTransformTest(String testname, RUNTIME_PLATFORM rt, String ofmt, boolean colnames) {
    // set runtime platform
    RUNTIME_PLATFORM rtold = rtplatform;
    rtplatform = rt;
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    // set transform specification
    String DATASET = DATASET1;
    String SPEC = colnames ? SPEC1b : SPEC1;
    if (!ofmt.equals("csv"))
        throw new RuntimeException("Unsupported test output format");
    try {
        getAndLoadTestConfiguration(testname);
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + testname + ".dml";
        programArgs = new String[] { "-explain", "-nvargs", "DATA=" + HOME + "input/" + DATASET, "TFSPEC=" + HOME + "input/" + SPEC, "TFDATA=" + output("tfout"), "OFMT=" + ofmt, "OSEP=," };
        runTest(true, false, null, -1);
        // read input/output and compare
        FrameReader reader1 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo, new CSVFileFormatProperties(true, ",", false));
        FrameBlock fb1 = reader1.readFrameFromHDFS(HOME + "input/" + DATASET, -1L, -1L);
        FrameReader reader2 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo);
        FrameBlock fb2 = reader2.readFrameFromHDFS(output("tfout"), -1L, -1L);
        String[][] R1 = DataConverter.convertToStringFrame(fb1);
        String[][] R2 = DataConverter.convertToStringFrame(fb2);
        TestUtils.compareFrames(R1, R2, R1.length, R1[0].length);
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    } finally {
        rtplatform = rtold;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
    }
}
Also used : RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader)

Example 64 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class TransformFrameEncodeDecodeTokenTest method runTransformTest.

/**
 * @param rt
 * @param ofmt
 * @param dataset
 */
private void runTransformTest(RUNTIME_PLATFORM rt, String ofmt) {
    // set runtime platform
    RUNTIME_PLATFORM rtold = rtplatform;
    rtplatform = rt;
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    if (!ofmt.equals("csv"))
        throw new RuntimeException("Unsupported test output format");
    try {
        getAndLoadTestConfiguration(TEST_NAME1);
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
        programArgs = new String[] { "-explain", "-nvargs", "DATA=" + HOME + "input/" + DATASET1, "TFSPEC=" + HOME + "input/" + SPEC1, "TFDATA=" + output("tfout"), "SEP= ", "OFMT=" + ofmt, "OSEP= " };
        runTest(true, false, null, -1);
        // read input/output and compare
        FrameReader reader1 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo, new CSVFileFormatProperties(false, " ", false));
        FrameBlock fb1 = reader1.readFrameFromHDFS(HOME + "input/" + DATASET1, -1L, -1L);
        FrameReader reader2 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo, new CSVFileFormatProperties(false, " ", false));
        FrameBlock fb2 = reader2.readFrameFromHDFS(output("tfout"), -1L, -1L);
        String[][] R1 = DataConverter.convertToStringFrame(fb1);
        String[][] R2 = DataConverter.convertToStringFrame(fb2);
        TestUtils.compareFrames(R1, R2, R1.length, R1[0].length);
        if (rt == RUNTIME_PLATFORM.HYBRID_SPARK) {
            Assert.assertEquals("Wrong number of executed Spark instructions: " + Statistics.getNoOfExecutedSPInst(), new Long(2), new Long(Statistics.getNoOfExecutedSPInst()));
        }
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    } finally {
        rtplatform = rtold;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
    }
}
Also used : RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader)

Example 65 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class FrameObject method readBlobFromHDFS.

@Override
protected FrameBlock readBlobFromHDFS(String fname, long rlen, long clen) throws IOException {
    MetaDataFormat iimd = (MetaDataFormat) _metaData;
    MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
    // handle missing schema if necessary
    ValueType[] lschema = (_schema != null) ? _schema : UtilFunctions.nCopies(clen >= 1 ? (int) clen : 1, ValueType.STRING);
    // read the frame block
    FrameBlock data = null;
    try {
        FrameReader reader = FrameReaderFactory.createFrameReader(iimd.getInputInfo(), getFileFormatProperties());
        data = reader.readFrameFromHDFS(fname, lschema, mc.getRows(), mc.getCols());
    } catch (DMLRuntimeException ex) {
        throw new IOException(ex);
    }
    // sanity check correct output
    if (data == null)
        throw new IOException("Unable to load frame from file: " + fname);
    return data;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) ValueType(org.apache.sysml.parser.Expression.ValueType) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)90 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)28 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)26 ValueType (org.apache.sysml.parser.Expression.ValueType)23 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)23 FrameReader (org.apache.sysml.runtime.io.FrameReader)18 IOException (java.io.IOException)16 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)16 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)15 LongWritable (org.apache.hadoop.io.LongWritable)12 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)11 CSVFileFormatProperties (org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties)11 FrameWriter (org.apache.sysml.runtime.io.FrameWriter)9 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)8 Text (org.apache.hadoop.io.Text)7 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)7 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)7 ConvertStringToLongTextPair (org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair)6 CopyTextInputFunction (org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction)5 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)5