Search in sources :

Example 11 with FrameReader

use of org.apache.sysml.runtime.io.FrameReader in project incubator-systemml by apache.

the class TransformCSVFrameEncodeReadTest method runTransformTest.

/**
 * @param rt
 * @param ofmt
 * @param dataset
 */
private void runTransformTest(RUNTIME_PLATFORM rt, String ofmt, boolean subset, boolean parRead) {
    // set runtime platform
    RUNTIME_PLATFORM rtold = rtplatform;
    rtplatform = rt;
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    if (!ofmt.equals("csv"))
        throw new RuntimeException("Unsupported test output format");
    try {
        getAndLoadTestConfiguration(TEST_NAME1);
        String HOME = SCRIPT_DIR + TEST_DIR;
        int nrows = subset ? 4 : 13;
        fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
        programArgs = new String[] { "-explain", "-stats", "-args", HOME + "input/" + DATASET, String.valueOf(nrows), output("R") };
        runTest(true, false, null, -1);
        // read input/output and compare
        FrameReader reader2 = parRead ? new FrameReaderTextCSVParallel(new CSVFileFormatProperties()) : new FrameReaderTextCSV(new CSVFileFormatProperties());
        FrameBlock fb2 = reader2.readFrameFromHDFS(output("R"), -1L, -1L);
        System.out.println(DataConverter.toString(fb2));
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    } finally {
        rtplatform = rtold;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
    }
}
Also used : RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) FrameReaderTextCSVParallel(org.apache.sysml.runtime.io.FrameReaderTextCSVParallel) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) FrameReaderTextCSV(org.apache.sysml.runtime.io.FrameReaderTextCSV) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader)

Example 12 with FrameReader

use of org.apache.sysml.runtime.io.FrameReader in project incubator-systemml by apache.

the class TransformEncodeDecodeTest method runTransformEncodeDecodeTest.

private void runTransformEncodeDecodeTest(ExecType et, boolean sparse, String fmt) {
    RUNTIME_PLATFORM platformOld = rtplatform;
    // only CP supported
    rtplatform = RUNTIME_PLATFORM.HYBRID;
    try {
        getAndLoadTestConfiguration(TEST_NAME1);
        // get input/output info
        InputInfo iinfo = InputInfo.stringExternalToInputInfo(fmt);
        OutputInfo oinfo = InputInfo.getMatchingOutputInfo(iinfo);
        // generate and write input data
        double[][] A = TestUtils.round(getRandomMatrix(rows, cols, 1, 15, sparse ? sparsity2 : sparsity1, 7));
        FrameBlock FA = DataConverter.convertToFrameBlock(DataConverter.convertToMatrixBlock(A));
        FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo);
        writer.writeFrameToHDFS(FA, input("F"), rows, cols);
        fullDMLScriptName = SCRIPT_DIR + TEST_DIR + TEST_NAME1 + ".dml";
        programArgs = new String[] { "-explain", "-args", input("F"), fmt, String.valueOf(rows), String.valueOf(cols), SCRIPT_DIR + TEST_DIR + SPEC, output("FO") };
        // run test
        runTest(true, false, null, -1);
        // compare matrices (values recoded to identical codes)
        FrameReader reader = FrameReaderFactory.createFrameReader(iinfo);
        FrameBlock FO = reader.readFrameFromHDFS(output("FO"), 16, 2);
        HashMap<String, Long> cFA = getCounts(FA, 1);
        Iterator<String[]> iterFO = FO.getStringRowIterator();
        while (iterFO.hasNext()) {
            String[] row = iterFO.next();
            Double expected = (double) cFA.get(row[1]);
            Double val = (row[0] != null) ? Double.valueOf(row[0]) : 0;
            Assert.assertEquals("Output aggregates don't match: " + expected + " vs " + val, expected, val);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
        Assert.fail(ex.getMessage());
    } finally {
        rtplatform = platformOld;
    }
}
Also used : FrameWriter(org.apache.sysml.runtime.io.FrameWriter) RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader)

Example 13 with FrameReader

use of org.apache.sysml.runtime.io.FrameReader in project incubator-systemml by apache.

the class TransformFrameEncodeDecodeTest method runTransformTest.

/**
 * @param rt
 * @param ofmt
 * @param dataset
 */
private void runTransformTest(RUNTIME_PLATFORM rt, String ofmt, TransformType type, boolean colnames) {
    // set runtime platform
    RUNTIME_PLATFORM rtold = rtplatform;
    rtplatform = rt;
    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    // set transform specification
    String SPEC = null;
    String DATASET = null;
    switch(type) {
        case RECODE:
            SPEC = colnames ? SPEC1b : SPEC1;
            DATASET = DATASET1;
            break;
        case DUMMY:
            SPEC = colnames ? SPEC2b : SPEC2;
            DATASET = DATASET1;
            break;
        default:
            throw new RuntimeException("Unsupported transform type for encode/decode test.");
    }
    if (!ofmt.equals("csv"))
        throw new RuntimeException("Unsupported test output format");
    try {
        getAndLoadTestConfiguration(TEST_NAME1);
        String HOME = SCRIPT_DIR + TEST_DIR;
        fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
        programArgs = new String[] { "-explain", "-nvargs", "DATA=" + HOME + "input/" + DATASET, "TFSPEC=" + HOME + "input/" + SPEC, "TFDATA=" + output("tfout"), "SEP=,", "OFMT=" + ofmt, "OSEP=," };
        // Originally OSEP was set to
        // OSEP=","
        // Apache Commons CLI strips away the leading and trailing quotes, leaving us with
        // OSEP=",
        // This is just a feature/bug and is reported in CLI-262,
        // though even a fix is unlikely to be backported to 1.2
        runTest(true, false, null, -1);
        // read input/output and compare
        FrameReader reader1 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo, new CSVFileFormatProperties(true, ",", false));
        FrameBlock fb1 = reader1.readFrameFromHDFS(HOME + "input/" + DATASET, -1L, -1L);
        FrameReader reader2 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo);
        FrameBlock fb2 = reader2.readFrameFromHDFS(output("tfout"), -1L, -1L);
        String[][] R1 = DataConverter.convertToStringFrame(fb1);
        String[][] R2 = DataConverter.convertToStringFrame(fb2);
        TestUtils.compareFrames(R1, R2, R1.length, R1[0].length);
        if (rt == RUNTIME_PLATFORM.HYBRID_SPARK) {
            Assert.assertEquals("Wrong number of executed Spark instructions: " + Statistics.getNoOfExecutedSPInst(), new Long(2), new Long(Statistics.getNoOfExecutedSPInst()));
        }
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    } finally {
        rtplatform = rtold;
        DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
    }
}
Also used : RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader)

Example 14 with FrameReader

use of org.apache.sysml.runtime.io.FrameReader in project incubator-systemml by apache.

the class Connection method convertToFrame.

/**
 * Converts an input stream of a frame in csv or textcell format
 * into a frame block.
 *
 * @param input InputStream to a string frame in csv or textcell format
 * @param rows number of rows in the frame
 * @param cols number of columns in the frame
 * @param format input format of the given stream
 * @return frame as a frame block
 * @throws IOException if IOException occurs
 */
public FrameBlock convertToFrame(InputStream input, int rows, int cols, String format) throws IOException {
    FrameBlock ret = null;
    // sanity check input format
    if (!(DataExpression.FORMAT_TYPE_VALUE_TEXT.equals(format) || DataExpression.FORMAT_TYPE_VALUE_MATRIXMARKET.equals(format) || DataExpression.FORMAT_TYPE_VALUE_CSV.equals(format))) {
        throw new IOException("Invalid input format (expected: csv, text or mm): " + format);
    }
    setLocalConfigs();
    try {
        // read input frame
        InputInfo iinfo = DataExpression.FORMAT_TYPE_VALUE_CSV.equals(format) ? InputInfo.CSVInputInfo : InputInfo.TextCellInputInfo;
        FrameReader reader = FrameReaderFactory.createFrameReader(iinfo);
        ret = reader.readFrameFromInputStream(input, rows, cols);
    } catch (DMLRuntimeException rex) {
        throw new IOException(rex);
    }
    return ret;
}
Also used : InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) IOException(java.io.IOException) FrameReader(org.apache.sysml.runtime.io.FrameReader) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 15 with FrameReader

use of org.apache.sysml.runtime.io.FrameReader in project incubator-systemml by apache.

the class FrameMatrixWriteTest method readFrameInput.

private static double[][] readFrameInput(String fname, String ofmt, int rows, int cols) throws IOException {
    // read input data
    FrameReader reader = FrameReaderFactory.createFrameReader(InputInfo.stringExternalToInputInfo(ofmt));
    FrameBlock fb = reader.readFrameFromHDFS(fname, rows, cols);
    MatrixBlock ret = DataConverter.convertToMatrixBlock(fb);
    return DataConverter.convertToDoubleMatrix(ret);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameReader(org.apache.sysml.runtime.io.FrameReader)

Aggregations

FrameReader (org.apache.sysml.runtime.io.FrameReader)21 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)18 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)8 CSVFileFormatProperties (org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties)8 IOException (java.io.IOException)7 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)5 FrameWriter (org.apache.sysml.runtime.io.FrameWriter)5 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)5 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)5 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)3 ValueType (org.apache.sysml.parser.Expression.ValueType)2 MatrixReader (org.apache.sysml.runtime.io.MatrixReader)2 MatrixWriter (org.apache.sysml.runtime.io.MatrixWriter)2 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)1 DMLException (org.apache.sysml.api.DMLException)1 LanguageException (org.apache.sysml.parser.LanguageException)1 ParseException (org.apache.sysml.parser.ParseException)1 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)1 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)1 RDDTransformApplyFunction (org.apache.sysml.runtime.instructions.spark.ParameterizedBuiltinSPInstruction.RDDTransformApplyFunction)1