Search in sources :

Example 6 with FrameReader

use of org.apache.sysml.runtime.io.FrameReader in project incubator-systemml by apache.

the class FrameConverterTest method runConverterAndVerify.

/**
 * @param schema
 * @param A
 * @param type
 * @param iinfo
 * @param oinfo
 * @param instType
 */
private void runConverterAndVerify(ValueType[] schema, double[][] A, ConvType type, InputInfo iinfo, OutputInfo oinfo) throws IOException {
    try {
        // initialize the frame data.
        FrameBlock frame1 = new FrameBlock(schema);
        initFrameData(frame1, A, schema);
        // write frame data to hdfs
        FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo);
        writer.writeFrameToHDFS(frame1, input("A"), rows, schema.length);
        // run converter under test
        MatrixCharacteristics mc = new MatrixCharacteristics(rows, schema.length, -1, -1, -1);
        runConverter(type, mc, null, Arrays.asList(schema), input("A"), output("B"));
        // read frame data from hdfs
        FrameReader reader = FrameReaderFactory.createFrameReader(iinfo);
        FrameBlock frame2 = reader.readFrameFromHDFS(output("B"), rows, schema.length);
        // verify input and output frame
        verifyFrameData(frame1, frame2);
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    } finally {
        MapReduceTool.deleteFileIfExistOnHDFS(input("A"));
        MapReduceTool.deleteFileIfExistOnHDFS(output("B"));
    }
}
Also used : FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader) FrameWriter(org.apache.sysml.runtime.io.FrameWriter) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 7 with FrameReader

use of org.apache.sysml.runtime.io.FrameReader in project incubator-systemml by apache.

the class FrameMatrixCastingTest method readMatrixOrFrameInput.

private static double[][] readMatrixOrFrameInput(String fname, int rows, int cols, DataType dt) throws IOException {
    MatrixBlock ret = null;
    // read input data
    if (dt == DataType.FRAME) {
        FrameReader reader = FrameReaderFactory.createFrameReader(InputInfo.BinaryBlockInputInfo);
        FrameBlock fb = reader.readFrameFromHDFS(fname, rows, cols);
        ret = DataConverter.convertToMatrixBlock(fb);
    } else {
        int blksize = ConfigurationManager.getBlocksize();
        MatrixReader reader = MatrixReaderFactory.createMatrixReader(InputInfo.BinaryBlockInputInfo);
        ret = reader.readMatrixFromHDFS(fname, rows, cols, blksize, blksize, -1);
    }
    return DataConverter.convertToDoubleMatrix(ret);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader) MatrixReader(org.apache.sysml.runtime.io.MatrixReader)

Example 8 with FrameReader

use of org.apache.sysml.runtime.io.FrameReader in project incubator-systemml by apache.

the class FrameSchemaReadTest method runFrameSchemaReadTest.

/**
 * @param testname
 * @param schema
 * @param wildcard
 */
private void runFrameSchemaReadTest(String testname, ValueType[] schema, boolean wildcard) {
    try {
        TestConfiguration config = getTestConfiguration(testname);
        loadTestConfiguration(config);
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + testname + ".dml";
        programArgs = new String[] { "-explain", "-args", input("A"), getSchemaString(schema, wildcard), Integer.toString(rows), Integer.toString(schema.length), output("B") };
        // data generation
        double[][] A = getRandomMatrix(rows, schema.length, -10, 10, 0.9, 2373);
        // prepare input/output infos
        FrameBlock frame1 = new FrameBlock(schema);
        initFrameData(frame1, A, schema);
        // write frame data to hdfs
        FrameWriter writer = FrameWriterFactory.createFrameWriter(OutputInfo.CSVOutputInfo);
        writer.writeFrameToHDFS(frame1, input("A"), rows, schema.length);
        // run testcase
        runTest(true, false, null, -1);
        // read frame data from hdfs (not via readers to test physical schema)
        FrameReader reader = FrameReaderFactory.createFrameReader(InputInfo.BinaryBlockInputInfo);
        FrameBlock frame2 = ((FrameReaderBinaryBlock) reader).readFirstBlock(output("B"));
        // verify output schema
        ValueType[] schemaExpected = (testname.equals(TEST_NAME2) || wildcard) ? Collections.nCopies(schema.length, ValueType.STRING).toArray(new ValueType[0]) : schema;
        for (int i = 0; i < schemaExpected.length; i++) {
            Assert.assertEquals("Wrong result: " + frame2.getSchema()[i] + ".", schemaExpected[i], frame2.getSchema()[i]);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    }
}
Also used : FrameReaderBinaryBlock(org.apache.sysml.runtime.io.FrameReaderBinaryBlock) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) ValueType(org.apache.sysml.parser.Expression.ValueType) TestConfiguration(org.apache.sysml.test.integration.TestConfiguration) FrameReader(org.apache.sysml.runtime.io.FrameReader) FrameWriter(org.apache.sysml.runtime.io.FrameWriter)

Example 9 with FrameReader

use of org.apache.sysml.runtime.io.FrameReader in project incubator-systemml by apache.

the class FrameCSVReadWriteTest method runCSVQuotesReadWriteTest.

/**
 * @param rt
 * @param ofmt
 * @param dataset
 */
private void runCSVQuotesReadWriteTest(RUNTIME_PLATFORM rt, String ofmt) {
    // set runtime platform
    RUNTIME_PLATFORM rtold = rtplatform;
    rtplatform = rt;
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    if (!ofmt.equals("csv"))
        throw new RuntimeException("Unsupported test output format");
    try {
        getAndLoadTestConfiguration(TEST_NAME1);
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
        programArgs = new String[] { "-explain", "-args", HOME + "input/" + DATASET, output("R") };
        runTest(true, false, null, -1);
        // read input/output and compare
        FrameReader reader1 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo, new CSVFileFormatProperties(false, ",", false));
        FrameBlock fb1 = reader1.readFrameFromHDFS(HOME + "input/" + DATASET, -1L, -1L);
        FrameReader reader2 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo);
        FrameBlock fb2 = reader2.readFrameFromHDFS(output("R"), -1L, -1L);
        String[][] R1 = DataConverter.convertToStringFrame(fb1);
        String[][] R2 = DataConverter.convertToStringFrame(fb2);
        TestUtils.compareFrames(R1, R2, R1.length, R1[0].length);
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    } finally {
        rtplatform = rtold;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
    }
}
Also used : RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader)

Example 10 with FrameReader

use of org.apache.sysml.runtime.io.FrameReader in project incubator-systemml by apache.

the class TransformCSVFrameEncodeDecodeTest method runTransformTest.

/**
 * @param rt
 * @param ofmt
 * @param dataset
 */
private void runTransformTest(RUNTIME_PLATFORM rt, String ofmt) {
    // set runtime platform
    RUNTIME_PLATFORM rtold = rtplatform;
    rtplatform = rt;
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    if (!ofmt.equals("csv"))
        throw new RuntimeException("Unsupported test output format");
    try {
        getAndLoadTestConfiguration(TEST_NAME1);
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
        programArgs = new String[] { "-explain", "-args", HOME + "input/" + DATASET, output("R") };
        runTest(true, false, null, -1);
        // read input/output and compare
        FrameReader reader1 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo, new CSVFileFormatProperties(false, ",", false));
        FrameBlock fb1 = reader1.readFrameFromHDFS(HOME + "input/" + DATASET, -1L, -1L);
        FrameReader reader2 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo);
        FrameBlock fb2 = reader2.readFrameFromHDFS(output("R"), -1L, -1L);
        String[][] R1 = DataConverter.convertToStringFrame(fb1);
        String[][] R2 = DataConverter.convertToStringFrame(fb2);
        TestUtils.compareFrames(R1, R2, R1.length, R1[0].length);
        if (rt == RUNTIME_PLATFORM.HYBRID_SPARK) {
            Assert.assertEquals("Wrong number of executed Spark instructions: " + Statistics.getNoOfExecutedSPInst(), new Long(2), new Long(Statistics.getNoOfExecutedSPInst()));
        }
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    } finally {
        rtplatform = rtold;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
    }
}
Also used : RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader)

Aggregations

FrameReader (org.apache.sysml.runtime.io.FrameReader)21 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)18 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)8 CSVFileFormatProperties (org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties)8 IOException (java.io.IOException)7 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)5 FrameWriter (org.apache.sysml.runtime.io.FrameWriter)5 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)5 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)5 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)3 ValueType (org.apache.sysml.parser.Expression.ValueType)2 MatrixReader (org.apache.sysml.runtime.io.MatrixReader)2 MatrixWriter (org.apache.sysml.runtime.io.MatrixWriter)2 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)1 DMLException (org.apache.sysml.api.DMLException)1 LanguageException (org.apache.sysml.parser.LanguageException)1 ParseException (org.apache.sysml.parser.ParseException)1 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)1 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)1 RDDTransformApplyFunction (org.apache.sysml.runtime.instructions.spark.ParameterizedBuiltinSPInstruction.RDDTransformApplyFunction)1