Search in sources :

Example 31 with MatrixMetadata

use of org.apache.sysml.api.mlcontext.MatrixMetadata in project systemml by apache.

the class MLContextTest method testDataFrameGoodMetadataDML.

@Test
public void testDataFrameGoodMetadataDML() {
    System.out.println("MLContextTest - DataFrame good metadata DML");
    List<String> list = new ArrayList<String>();
    list.add("10,20,30");
    list.add("40,50,60");
    list.add("70,80,90");
    JavaRDD<String> javaRddString = sc.parallelize(list);
    JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToDoubleArrayRow());
    List<StructField> fields = new ArrayList<StructField>();
    fields.add(DataTypes.createStructField("C1", DataTypes.DoubleType, true));
    fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true));
    fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true));
    StructType schema = DataTypes.createStructType(fields);
    Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema);
    MatrixMetadata mm = new MatrixMetadata(3, 3, 9);
    Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm);
    setExpectedStdOut("sum: 450.0");
    ml.execute(script);
}
Also used : Script(org.apache.sysml.api.mlcontext.Script) StructType(org.apache.spark.sql.types.StructType) ArrayList(java.util.ArrayList) StructField(org.apache.spark.sql.types.StructField) Row(org.apache.spark.sql.Row) MatrixMetadata(org.apache.sysml.api.mlcontext.MatrixMetadata) Test(org.junit.Test)

Example 32 with MatrixMetadata

use of org.apache.sysml.api.mlcontext.MatrixMetadata in project systemml by apache.

the class MLContextTest method testDataFrameSumPYDMLMllibVectorWithIDColumn.

@Test
public void testDataFrameSumPYDMLMllibVectorWithIDColumn() {
    System.out.println("MLContextTest - DataFrame sum PYDML, mllib vector with ID column");
    List<Tuple2<Double, org.apache.spark.mllib.linalg.Vector>> list = new ArrayList<Tuple2<Double, org.apache.spark.mllib.linalg.Vector>>();
    list.add(new Tuple2<Double, org.apache.spark.mllib.linalg.Vector>(1.0, org.apache.spark.mllib.linalg.Vectors.dense(1.0, 2.0, 3.0)));
    list.add(new Tuple2<Double, org.apache.spark.mllib.linalg.Vector>(2.0, org.apache.spark.mllib.linalg.Vectors.dense(4.0, 5.0, 6.0)));
    list.add(new Tuple2<Double, org.apache.spark.mllib.linalg.Vector>(3.0, org.apache.spark.mllib.linalg.Vectors.dense(7.0, 8.0, 9.0)));
    JavaRDD<Tuple2<Double, org.apache.spark.mllib.linalg.Vector>> javaRddTuple = sc.parallelize(list);
    JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleMllibVectorRow());
    List<StructField> fields = new ArrayList<StructField>();
    fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true));
    fields.add(DataTypes.createStructField("C1", new org.apache.spark.mllib.linalg.VectorUDT(), true));
    StructType schema = DataTypes.createStructType(fields);
    Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema);
    MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_INDEX);
    Script script = pydml("print('sum: ' + sum(M))").in("M", dataFrame, mm);
    setExpectedStdOut("sum: 45.0");
    ml.execute(script);
}
Also used : Script(org.apache.sysml.api.mlcontext.Script) VectorUDT(org.apache.spark.ml.linalg.VectorUDT) StructType(org.apache.spark.sql.types.StructType) ArrayList(java.util.ArrayList) StructField(org.apache.spark.sql.types.StructField) Tuple2(scala.Tuple2) Row(org.apache.spark.sql.Row) MatrixMetadata(org.apache.sysml.api.mlcontext.MatrixMetadata) Vector(org.apache.spark.ml.linalg.Vector) DenseVector(org.apache.spark.ml.linalg.DenseVector) Test(org.junit.Test)

Example 33 with MatrixMetadata

use of org.apache.sysml.api.mlcontext.MatrixMetadata in project systemml by apache.

the class MLContextFrameTest method testInputFrameAndMatrixOutputMatrix.

@Test
public void testInputFrameAndMatrixOutputMatrix() {
    System.out.println("MLContextFrameTest - input frame and matrix, output matrix");
    List<String> dataA = new ArrayList<String>();
    dataA.add("Test1,4.0");
    dataA.add("Test2,5.0");
    dataA.add("Test3,6.0");
    JavaRDD<String> javaRddStringA = sc.parallelize(dataA);
    ValueType[] schema = { ValueType.STRING, ValueType.DOUBLE };
    List<String> dataB = new ArrayList<String>();
    dataB.add("1.0");
    dataB.add("2.0");
    JavaRDD<String> javaRddStringB = sc.parallelize(dataB);
    JavaRDD<Row> javaRddRowA = FrameRDDConverterUtils.csvToRowRDD(sc, javaRddStringA, CSV_DELIM, schema);
    JavaRDD<Row> javaRddRowB = javaRddStringB.map(new CommaSeparatedValueStringToDoubleArrayRow());
    List<StructField> fieldsA = new ArrayList<StructField>();
    fieldsA.add(DataTypes.createStructField("1", DataTypes.StringType, true));
    fieldsA.add(DataTypes.createStructField("2", DataTypes.DoubleType, true));
    StructType schemaA = DataTypes.createStructType(fieldsA);
    Dataset<Row> dataFrameA = spark.createDataFrame(javaRddRowA, schemaA);
    List<StructField> fieldsB = new ArrayList<StructField>();
    fieldsB.add(DataTypes.createStructField("1", DataTypes.DoubleType, true));
    StructType schemaB = DataTypes.createStructType(fieldsB);
    Dataset<Row> dataFrameB = spark.createDataFrame(javaRddRowB, schemaB);
    String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: true ,recode: [ 1, 2 ]}\");\n" + "C = tA %*% B;\n" + "M = s * C;";
    Script script = dml(dmlString).in("A", dataFrameA, new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length)).in("B", dataFrameB, new MatrixMetadata(MatrixFormat.CSV, dataFrameB.count(), (long) dataFrameB.columns().length)).in("s", 2).out("M");
    MLResults results = ml.execute(script);
    double[][] matrix = results.getMatrixAs2DDoubleArray("M");
    Assert.assertEquals(6.0, matrix[0][0], 0.0);
    Assert.assertEquals(12.0, matrix[1][0], 0.0);
    Assert.assertEquals(18.0, matrix[2][0], 0.0);
}
Also used : Script(org.apache.sysml.api.mlcontext.Script) StructType(org.apache.spark.sql.types.StructType) ValueType(org.apache.sysml.parser.Expression.ValueType) MLResults(org.apache.sysml.api.mlcontext.MLResults) ArrayList(java.util.ArrayList) CommaSeparatedValueStringToDoubleArrayRow(org.apache.sysml.test.integration.mlcontext.MLContextTest.CommaSeparatedValueStringToDoubleArrayRow) StructField(org.apache.spark.sql.types.StructField) Row(org.apache.spark.sql.Row) CommaSeparatedValueStringToDoubleArrayRow(org.apache.sysml.test.integration.mlcontext.MLContextTest.CommaSeparatedValueStringToDoubleArrayRow) MatrixMetadata(org.apache.sysml.api.mlcontext.MatrixMetadata) FrameMetadata(org.apache.sysml.api.mlcontext.FrameMetadata) Test(org.junit.Test)

Example 34 with MatrixMetadata

use of org.apache.sysml.api.mlcontext.MatrixMetadata in project systemml by apache.

the class MLContextOutputBlocksizeTest method runMLContextOutputBlocksizeTest.

private void runMLContextOutputBlocksizeTest(String format) {
    try {
        double[][] A = getRandomMatrix(rows, cols, -10, 10, sparsity, 76543);
        MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
        int blksz = ConfigurationManager.getBlocksize();
        MatrixCharacteristics mc = new MatrixCharacteristics(rows, cols, blksz, blksz, mbA.getNonZeros());
        // create input dataset
        JavaPairRDD<MatrixIndexes, MatrixBlock> in = SparkExecutionContext.toMatrixJavaPairRDD(sc, mbA, blksz, blksz);
        Matrix m = new Matrix(in, new MatrixMetadata(mc));
        ml.setExplain(true);
        ml.setExplainLevel(ExplainLevel.HOPS);
        // execute script
        String s = "if( sum(X) > 0 )" + "   X = X/2;" + "R = X;" + "write(R, \"/tmp\", format=\"" + format + "\");";
        Script script = dml(s).in("X", m).out("R");
        MLResults results = ml.execute(script);
        // compare output matrix characteristics
        MatrixCharacteristics mcOut = results.getMatrix("R").getMatrixMetadata().asMatrixCharacteristics();
        Assert.assertEquals(blksz, mcOut.getRowsPerBlock());
        Assert.assertEquals(blksz, mcOut.getColsPerBlock());
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new RuntimeException(ex);
    }
}
Also used : Script(org.apache.sysml.api.mlcontext.Script) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MLResults(org.apache.sysml.api.mlcontext.MLResults) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) Matrix(org.apache.sysml.api.mlcontext.Matrix) MatrixMetadata(org.apache.sysml.api.mlcontext.MatrixMetadata)

Example 35 with MatrixMetadata

use of org.apache.sysml.api.mlcontext.MatrixMetadata in project systemml by apache.

the class GNMFTest method testGNMFWithRDMLAndJava.

@Test
public void testGNMFWithRDMLAndJava() throws IOException, DMLException, ParseException {
    System.out.println("------------ BEGIN " + TEST_NAME + " TEST {" + numRegisteredInputs + ", " + numRegisteredOutputs + "} ------------");
    this.scriptType = ScriptType.DML;
    int m = 2000;
    int n = 1500;
    int k = 50;
    int maxiter = 2;
    double Eps = Math.pow(10, -8);
    getAndLoadTestConfiguration(TEST_NAME);
    List<String> proArgs = new ArrayList<String>();
    proArgs.add(input("v"));
    proArgs.add(input("w"));
    proArgs.add(input("h"));
    proArgs.add(Integer.toString(maxiter));
    proArgs.add(output("w"));
    proArgs.add(output("h"));
    programArgs = proArgs.toArray(new String[proArgs.size()]);
    fullDMLScriptName = getScript();
    rCmd = getRCmd(inputDir(), Integer.toString(maxiter), expectedDir());
    double[][] v = getRandomMatrix(m, n, 1, 5, 0.2, System.currentTimeMillis());
    double[][] w = getRandomMatrix(m, k, 0, 1, 1, System.currentTimeMillis());
    double[][] h = getRandomMatrix(k, n, 0, 1, 1, System.currentTimeMillis());
    writeInputMatrixWithMTD("v", v, true);
    writeInputMatrixWithMTD("w", w, true);
    writeInputMatrixWithMTD("h", h, true);
    for (int i = 0; i < maxiter; i++) {
        double[][] tW = TestUtils.performTranspose(w);
        double[][] tWV = TestUtils.performMatrixMultiplication(tW, v);
        double[][] tWW = TestUtils.performMatrixMultiplication(tW, w);
        double[][] tWWH = TestUtils.performMatrixMultiplication(tWW, h);
        for (int j = 0; j < k; j++) {
            for (int l = 0; l < n; l++) {
                h[j][l] = h[j][l] * (tWV[j][l] / (tWWH[j][l] + Eps));
            }
        }
        double[][] tH = TestUtils.performTranspose(h);
        double[][] vTH = TestUtils.performMatrixMultiplication(v, tH);
        double[][] hTH = TestUtils.performMatrixMultiplication(h, tH);
        double[][] wHTH = TestUtils.performMatrixMultiplication(w, hTH);
        for (int j = 0; j < m; j++) {
            for (int l = 0; l < k; l++) {
                w[j][l] = w[j][l] * (vTH[j][l] / (wHTH[j][l] + Eps));
            }
        }
    }
    boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG;
    DMLScript.USE_LOCAL_SPARK_CONFIG = true;
    RUNTIME_PLATFORM oldRT = DMLScript.rtplatform;
    try {
        DMLScript.rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK;
        Script script = ScriptFactory.dmlFromFile(fullDMLScriptName);
        // set positional argument values
        for (int argNum = 1; argNum <= proArgs.size(); argNum++) {
            script.in("$" + argNum, proArgs.get(argNum - 1));
        }
        // Read two matrices through RDD and one through HDFS
        if (numRegisteredInputs >= 1) {
            JavaRDD<String> vIn = sc.sc().textFile(input("v"), 2).toJavaRDD();
            MatrixMetadata mm = new MatrixMetadata(MatrixFormat.IJV, m, n);
            script.in("V", vIn, mm);
        }
        if (numRegisteredInputs >= 2) {
            JavaRDD<String> wIn = sc.sc().textFile(input("w"), 2).toJavaRDD();
            MatrixMetadata mm = new MatrixMetadata(MatrixFormat.IJV, m, k);
            script.in("W", wIn, mm);
        }
        if (numRegisteredInputs >= 3) {
            JavaRDD<String> hIn = sc.sc().textFile(input("h"), 2).toJavaRDD();
            MatrixMetadata mm = new MatrixMetadata(MatrixFormat.IJV, k, n);
            script.in("H", hIn, mm);
        }
        // Output one matrix to HDFS and get one as RDD
        if (numRegisteredOutputs >= 1) {
            script.out("H");
        }
        if (numRegisteredOutputs >= 2) {
            script.out("W");
            ml.setConfigProperty(DMLConfig.CP_PARALLEL_OPS, "false");
        }
        MLResults results = ml.execute(script);
        if (numRegisteredOutputs >= 2) {
            String configStr = ConfigurationManager.getDMLConfig().getConfigInfo();
            if (configStr.contains("cp.parallel.ops: true"))
                Assert.fail("Configuration not updated via setConfig");
        }
        if (numRegisteredOutputs >= 1) {
            RDD<String> hOut = results.getRDDStringIJV("H");
            String fName = output("h");
            try {
                MapReduceTool.deleteFileIfExistOnHDFS(fName);
            } catch (IOException e) {
                throw new DMLRuntimeException("Error: While deleting file on HDFS");
            }
            hOut.saveAsTextFile(fName);
        }
        if (numRegisteredOutputs >= 2) {
            JavaRDD<String> javaRDDStringIJV = results.getJavaRDDStringIJV("W");
            JavaRDD<MatrixEntry> matRDD = javaRDDStringIJV.map(new StringToMatrixEntry());
            Matrix matrix = results.getMatrix("W");
            MatrixCharacteristics mcW = matrix.getMatrixMetadata().asMatrixCharacteristics();
            CoordinateMatrix coordinateMatrix = new CoordinateMatrix(matRDD.rdd(), mcW.getRows(), mcW.getCols());
            JavaPairRDD<MatrixIndexes, MatrixBlock> binaryRDD = RDDConverterUtilsExt.coordinateMatrixToBinaryBlock(sc, coordinateMatrix, mcW, true);
            JavaRDD<String> wOut = RDDConverterUtils.binaryBlockToTextCell(binaryRDD, mcW);
            String fName = output("w");
            try {
                MapReduceTool.deleteFileIfExistOnHDFS(fName);
            } catch (IOException e) {
                throw new DMLRuntimeException("Error: While deleting file on HDFS");
            }
            wOut.saveAsTextFile(fName);
        }
        runRScript(true);
        // compare matrices
        HashMap<CellIndex, Double> hmWDML = readDMLMatrixFromHDFS("w");
        HashMap<CellIndex, Double> hmHDML = readDMLMatrixFromHDFS("h");
        HashMap<CellIndex, Double> hmWR = readRMatrixFromFS("w");
        HashMap<CellIndex, Double> hmHR = readRMatrixFromFS("h");
        TestUtils.compareMatrices(hmWDML, hmWR, 0.000001, "hmWDML", "hmWR");
        TestUtils.compareMatrices(hmHDML, hmHR, 0.000001, "hmHDML", "hmHR");
    } finally {
        DMLScript.rtplatform = oldRT;
        DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig;
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MLResults(org.apache.sysml.api.mlcontext.MLResults) ArrayList(java.util.ArrayList) MatrixEntry(org.apache.spark.mllib.linalg.distributed.MatrixEntry) CoordinateMatrix(org.apache.spark.mllib.linalg.distributed.CoordinateMatrix) Matrix(org.apache.sysml.api.mlcontext.Matrix) CellIndex(org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex) MatrixMetadata(org.apache.sysml.api.mlcontext.MatrixMetadata) Script(org.apache.sysml.api.mlcontext.Script) DMLScript(org.apache.sysml.api.DMLScript) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IOException(java.io.IOException) CoordinateMatrix(org.apache.spark.mllib.linalg.distributed.CoordinateMatrix) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) Test(org.junit.Test)

Aggregations

MatrixMetadata (org.apache.sysml.api.mlcontext.MatrixMetadata)72 Script (org.apache.sysml.api.mlcontext.Script)68 Test (org.junit.Test)68 ArrayList (java.util.ArrayList)60 Row (org.apache.spark.sql.Row)36 StructField (org.apache.spark.sql.types.StructField)34 StructType (org.apache.spark.sql.types.StructType)34 DenseVector (org.apache.spark.ml.linalg.DenseVector)16 Vector (org.apache.spark.ml.linalg.Vector)16 VectorUDT (org.apache.spark.ml.linalg.VectorUDT)16 MLResults (org.apache.sysml.api.mlcontext.MLResults)12 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)10 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)10 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)10 Matrix (org.apache.sysml.api.mlcontext.Matrix)8 Tuple2 (scala.Tuple2)8 URL (java.net.URL)4 List (java.util.List)4 Tuple3 (scala.Tuple3)4 Seq (scala.collection.Seq)4