Search in sources :

Example 6 with CorrelationResult

use of com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationResult in project Alink by alibaba.

the class StatisticsHelperTest method pearsonCorrelation.

@Test
public void pearsonCorrelation() throws Exception {
    BatchOperator data = getBatchTable();
    String[] selectedColNames = new String[] { "f_long", "f_int", "f_double" };
    DataSet<Tuple2<TableSummary, CorrelationResult>> dataSet = StatisticsHelper.pearsonCorrelation(data, selectedColNames);
    Tuple2<TableSummary, CorrelationResult> tuple2 = dataSet.collect().get(0);
    TableSummary summary = tuple2.f0;
    CorrelationResult corr = tuple2.f1;
    assertArrayEquals(summary.getColNames(), selectedColNames);
    assertEquals(summary.count(), 4);
    assertEquals(summary.max("f_double"), 4.0, 10e-4);
    assertEquals(summary.min("f_int"), 0.0, 10e-4);
    assertEquals(summary.mean("f_double"), 1.25, 10e-4);
    assertEquals(summary.variance("f_double"), 8.9167, 10e-4);
    assertEquals(summary.standardDeviation("f_double"), 2.9861, 10e-4);
    assertEquals(summary.normL1("f_double"), 11.0, 10e-4);
    assertEquals(summary.normL2("f_double"), 5.7446, 10e-4);
    assertArrayEquals(corr.getCorrelationMatrix().getArrayCopy1D(true), new double[] { 1.0, 0.6325, -0.9570, 0.6325, 1.0, -0.4756, -0.9570, -0.4756, 1.0 }, 10e-4);
}
Also used : Tuple2(org.apache.flink.api.java.tuple.Tuple2) TableSummary(com.alibaba.alink.operator.common.statistics.basicstatistic.TableSummary) CorrelationResult(com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationResult) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) Test(org.junit.Test)

Example 7 with CorrelationResult

use of com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationResult in project Alink by alibaba.

the class StatisticsHelperTest method vectorPearsonCorrelation.

@Test
public void vectorPearsonCorrelation() throws Exception {
    BatchOperator data = getDenseBatch();
    String vectorColName = "vec";
    DataSet<Tuple2<BaseVectorSummary, CorrelationResult>> dataSet = StatisticsHelper.vectorPearsonCorrelation(data, vectorColName);
    Tuple2<BaseVectorSummary, CorrelationResult> tuple2 = dataSet.collect().get(0);
    BaseVectorSummary summary = tuple2.f0;
    CorrelationResult corr = tuple2.f1;
    assertEquals(summary.vectorSize(), 3);
    assertEquals(summary.count(), 4);
    assertEquals(summary.max(2), 4.0, 10e-4);
    assertEquals(summary.min(1), 0.0, 10e-4);
    assertEquals(summary.mean(2), 1.25, 10e-4);
    assertEquals(summary.variance(2), 8.9167, 10e-4);
    assertEquals(summary.standardDeviation(2), 2.9861, 10e-4);
    assertEquals(summary.normL1(2), 11.0, 10e-4);
    assertEquals(summary.normL2(2), 5.7446, 10e-4);
    assertArrayEquals(corr.getCorrelationMatrix().getArrayCopy1D(true), new double[] { 1.0, 0.6325, -0.9570, 0.6325, 1.0, -0.4756, -0.9570, -0.4756, 1.0 }, 10e-4);
}
Also used : Tuple2(org.apache.flink.api.java.tuple.Tuple2) BaseVectorSummary(com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary) CorrelationResult(com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationResult) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) Test(org.junit.Test)

Example 8 with CorrelationResult

use of com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationResult in project Alink by alibaba.

the class VectorCorrelationBatchOpTest method test.

@Test
public void test() {
    Row[] testArray = new Row[] { Row.of("1.0 2.0"), Row.of("-1.0 -3.0"), Row.of("4.0 2.0") };
    String selectedColName = "vec";
    String[] colNames = new String[] { selectedColName };
    MemSourceBatchOp source = new MemSourceBatchOp(Arrays.asList(testArray), colNames);
    VectorCorrelationBatchOp corr = new VectorCorrelationBatchOp().setSelectedCol("vec").setMethod("PEARSON");
    corr.linkFrom(source);
    CorrelationResult corrMat = corr.collectCorrelation();
    System.out.println(corrMat.toString());
    Assert.assertArrayEquals(corrMat.getCorrelationMatrix().getArrayCopy1D(true), new double[] { 1.0, 0.802955068546966, 0.802955068546966, 1.0 }, 10e-4);
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) Row(org.apache.flink.types.Row) CorrelationResult(com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationResult) Test(org.junit.Test)

Example 9 with CorrelationResult

use of com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationResult in project Alink by alibaba.

the class Chap08 method c_3_2.

static void c_3_2() throws Exception {
    CsvSourceBatchOp source = new CsvSourceBatchOp().setFilePath(DATA_DIR + ORIGIN_FILE).setSchemaStr(SCHEMA_STRING);
    CorrelationResult correlation = new CorrelationBatchOp().linkFrom(source).collectCorrelation();
    String[] colNames = correlation.getColNames();
    System.out.print("Correlation of " + colNames[0] + " with " + colNames[1]);
    System.out.println(" is " + correlation.getCorrelation()[0][1]);
    System.out.println(correlation.getCorrelationMatrix());
    source.link(new CorrelationBatchOp().lazyCollectCorrelation(new Consumer<CorrelationResult>() {

        @Override
        public void accept(CorrelationResult correlationResult) {
            String[] colNames = correlationResult.getColNames();
            System.out.print("Correlation of " + colNames[0] + " with " + colNames[1]);
            System.out.println(" is " + correlationResult.getCorrelation()[0][1]);
            System.out.println(correlationResult.getCorrelationMatrix());
        }
    }));
    source.link(new CorrelationBatchOp().lazyPrintCorrelation("< Pearson Correlation >"));
    source.link(new CorrelationBatchOp().setMethod(Method.SPEARMAN).lazyPrintCorrelation("< Spearman Correlation >"));
    BatchOperator.execute();
}
Also used : Consumer(java.util.function.Consumer) CorrelationResult(com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationResult) CsvSourceBatchOp(com.alibaba.alink.operator.batch.source.CsvSourceBatchOp) CorrelationBatchOp(com.alibaba.alink.operator.batch.statistics.CorrelationBatchOp)

Aggregations

CorrelationResult (com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationResult)9 Row (org.apache.flink.types.Row)6 Test (org.junit.Test)6 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)4 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)4 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)4 TableSourceBatchOp (com.alibaba.alink.operator.batch.source.TableSourceBatchOp)2 BaseVectorSummary (com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary)2 CorrelationDataConverter (com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationDataConverter)2 TableSummary (com.alibaba.alink.operator.common.statistics.basicstatistic.TableSummary)2 CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)1 CorrelationBatchOp (com.alibaba.alink.operator.batch.statistics.CorrelationBatchOp)1 Consumer (java.util.function.Consumer)1 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)1