Search in sources :

Example 21 with BaseVectorSummary

use of com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary in project Alink by alibaba.

the class StatisticsHelperTest method dataSetSummary.

@Test
public void dataSetSummary() throws Exception {
    BatchOperator data = getDenseBatch();
    DataSet<BaseVectorSummary> dataSet = StatisticsHelper.summary(data.getDataSet().map(new MapFunction<Row, Vector>() {

        private static final long serialVersionUID = -6512822331768742553L;

        @Override
        public Vector map(Row in) throws Exception {
            return VectorUtil.getVector(in.getField(1));
        }
    }));
    BaseVectorSummary summary = dataSet.collect().get(0);
    assertEquals(summary.vectorSize(), 3);
    assertEquals(summary.count(), 4);
    assertEquals(summary.max(2), 4.0, 10e-4);
    assertEquals(summary.min(1), 0.0, 10e-4);
    assertEquals(summary.mean(2), 1.25, 10e-4);
    assertEquals(summary.variance(2), 8.9167, 10e-4);
    assertEquals(summary.standardDeviation(2), 2.9861, 10e-4);
    assertEquals(summary.normL1(2), 11.0, 10e-4);
    assertEquals(summary.normL2(2), 5.7446, 10e-4);
}
Also used : BaseVectorSummary(com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary) Row(org.apache.flink.types.Row) MapFunction(org.apache.flink.api.common.functions.MapFunction) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) Test(org.junit.Test)

Example 22 with BaseVectorSummary

use of com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary in project Alink by alibaba.

the class VectorSummarizerBatchOpTest method test.

@Test
public void test() {
    Row[] testArray = new Row[] { Row.of("1.0 2.0"), Row.of("-1.0 -3.0"), Row.of("4.0 2.0") };
    String selectedColName = "vec";
    String[] colNames = new String[] { selectedColName };
    MemSourceBatchOp source = new MemSourceBatchOp(Arrays.asList(testArray), colNames);
    VectorSummarizerBatchOp summarizer = new VectorSummarizerBatchOp().setSelectedCol("vec");
    summarizer.linkFrom(source);
    BaseVectorSummary srt = summarizer.collectVectorSummary();
    System.out.println(srt);
    Assert.assertEquals(srt.vectorSize(), 2);
    Assert.assertEquals(srt.count(), 3);
    Assert.assertEquals(srt.max(0), 4.0, 10e-4);
    Assert.assertEquals(srt.min(0), -1.0, 10e-4);
    Assert.assertEquals(srt.mean(0), 1.3333333333333333, 10e-4);
    Assert.assertEquals(srt.variance(0), 6.333333333333334, 10e-4);
    Assert.assertEquals(srt.standardDeviation(0), 2.5166114784235836, 10e-4);
    Assert.assertEquals(srt.normL1(0), 6.0, 10e-4);
    Assert.assertEquals(srt.normL2(0), 4.242640687119285, 10e-4);
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) BaseVectorSummary(com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary) Row(org.apache.flink.types.Row) Test(org.junit.Test)

Example 23 with BaseVectorSummary

use of com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary in project Alink by alibaba.

the class PCATest method testSparse.

private void testSparse() {
    String[] colNames = new String[] { "id", "vec" };
    Object[][] data = new Object[][] { { 1, "0:0.1 1:0.2 2:0.3 3:0.4" }, { 2, "0:0.2 1:0.1 2:0.2 3:0.6" }, { 3, "0:0.2 1:0.3 2:0.5 3:0.4" }, { 4, "0:0.3 1:0.1 2:0.3 3:0.7" }, { 5, "0:0.4 1:0.2 2:0.4 3:0.4" } };
    MemSourceBatchOp source = new MemSourceBatchOp(data, colNames);
    PCA pca = new PCA().setK(3).setCalculationType("CORR").setPredictionCol("pred").setReservedCols("id").setVectorCol("vec");
    pca.enableLazyPrintModelInfo();
    PCAModel model = pca.fit(source);
    BatchOperator<?> predict = model.transform(source);
    VectorSummarizerBatchOp summarizerOp = new VectorSummarizerBatchOp().setSelectedCol("pred");
    summarizerOp.linkFrom(predict);
    summarizerOp.lazyCollectVectorSummary(new Consumer<BaseVectorSummary>() {

        @Override
        public void accept(BaseVectorSummary summary) {
            Assert.assertEquals(3.4416913763379853E-15, Math.abs(summary.sum().get(0)), 10e-8);
        }
    });
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) BaseVectorSummary(com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary) VectorSummarizerBatchOp(com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp)

Example 24 with BaseVectorSummary

use of com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary in project Alink by alibaba.

the class PCATest method testTable.

public void testTable() throws Exception {
    String[] colNames = new String[] { "id", "f0", "f1", "f2", "f3" };
    Object[][] data = new Object[][] { { 1, 0.1, 0.2, 0.3, 0.4 }, { 2, 0.2, 0.1, 0.2, 0.6 }, { 3, 0.2, 0.3, 0.5, 0.4 }, { 4, 0.3, 0.1, 0.3, 0.7 }, { 5, 0.4, 0.2, 0.4, 0.4 } };
    MemSourceBatchOp source = new MemSourceBatchOp(data, colNames);
    PCA pca = new PCA().setK(3).setCalculationType("CORR").setPredictionCol("pred").setReservedCols("id").setSelectedCols("f0", "f1", "f2", "f3");
    pca.enableLazyPrintModelInfo();
    PCAModel model = pca.fit(source);
    BatchOperator<?> predict = model.transform(source);
    VectorSummarizerBatchOp summarizerOp = new VectorSummarizerBatchOp().setSelectedCol("pred");
    summarizerOp.linkFrom(predict);
    summarizerOp.lazyCollectVectorSummary(new Consumer<BaseVectorSummary>() {

        @Override
        public void accept(BaseVectorSummary summary) {
            Assert.assertEquals(3.1086244689504383E-15, Math.abs(summary.sum().get(0)), 10e-8);
        }
    });
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) BaseVectorSummary(com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary) VectorSummarizerBatchOp(com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp)

Aggregations

BaseVectorSummary (com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary)24 Row (org.apache.flink.types.Row)13 Vector (com.alibaba.alink.common.linalg.Vector)11 DenseVector (com.alibaba.alink.common.linalg.DenseVector)9 SparseVector (com.alibaba.alink.common.linalg.SparseVector)9 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)9 DataSet (org.apache.flink.api.java.DataSet)9 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)8 Test (org.junit.Test)8 ArrayList (java.util.ArrayList)7 Params (org.apache.flink.ml.api.misc.param.Params)5 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)4 IterativeComQueue (com.alibaba.alink.common.comqueue.IterativeComQueue)3 AllReduce (com.alibaba.alink.common.comqueue.communication.AllReduce)3 VectorSummarizerBatchOp (com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp)3 LdaModelDataConverter (com.alibaba.alink.operator.common.clustering.LdaModelDataConverter)3 MapFunction (org.apache.flink.api.common.functions.MapFunction)3 RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)3 Configuration (org.apache.flink.configuration.Configuration)3 DenseMatrix (com.alibaba.alink.common.linalg.DenseMatrix)2