Search in sources :

Example 1 with CorrelationDataConverter

use of com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationDataConverter in project Alink by alibaba.

the class CorrelationBatchOp method linkFrom.

@Override
public CorrelationBatchOp linkFrom(BatchOperator<?>... inputs) {
    BatchOperator<?> in = checkAndGetFirst(inputs);
    String[] selectedColNames = this.getParams().get(SELECTED_COLS);
    if (selectedColNames == null) {
        selectedColNames = in.getColNames();
    }
    // check col types must be double or bigint
    TableUtil.assertNumericalCols(in.getSchema(), selectedColNames);
    Method corrType = getMethod();
    if (Method.PEARSON == corrType) {
        DataSet<Tuple2<TableSummary, CorrelationResult>> srt = StatisticsHelper.pearsonCorrelation(in, selectedColNames);
        DataSet<Row> result = srt.flatMap(new FlatMapFunction<Tuple2<TableSummary, CorrelationResult>, Row>() {

            private static final long serialVersionUID = -4498296161046449646L;

            @Override
            public void flatMap(Tuple2<TableSummary, CorrelationResult> summary, Collector<Row> collector) {
                new CorrelationDataConverter().save(summary.f1, collector);
            }
        });
        this.setOutput(result, new CorrelationDataConverter().getModelSchema());
    } else {
        DataSet<Row> data = inputs[0].select(selectedColNames).getDataSet();
        DataSet<Row> rank = SpearmanCorrelation.calcRank(data, false);
        TypeInformation[] colTypes = new TypeInformation[selectedColNames.length];
        for (int i = 0; i < colTypes.length; i++) {
            colTypes[i] = Types.DOUBLE;
        }
        BatchOperator rankOp = new TableSourceBatchOp(DataSetConversionUtil.toTable(getMLEnvironmentId(), rank, selectedColNames, colTypes)).setMLEnvironmentId(getMLEnvironmentId());
        CorrelationBatchOp corrBatchOp = new CorrelationBatchOp().setMLEnvironmentId(getMLEnvironmentId()).setSelectedCols(selectedColNames);
        rankOp.link(corrBatchOp);
        this.setOutput(corrBatchOp.getDataSet(), corrBatchOp.getSchema());
    }
    return this;
}
Also used : CorrelationDataConverter(com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationDataConverter) CorrelationResult(com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationResult) TableSourceBatchOp(com.alibaba.alink.operator.batch.source.TableSourceBatchOp) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Row(org.apache.flink.types.Row) TableSummary(com.alibaba.alink.operator.common.statistics.basicstatistic.TableSummary)

Example 2 with CorrelationDataConverter

use of com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationDataConverter in project Alink by alibaba.

the class VectorCorrelationBatchOp method linkFrom.

@Override
public VectorCorrelationBatchOp linkFrom(BatchOperator<?>... inputs) {
    BatchOperator<?> in = checkAndGetFirst(inputs);
    String vectorColName = getSelectedCol();
    Method corrType = getMethod();
    if (Method.PEARSON == corrType) {
        DataSet<Tuple2<BaseVectorSummary, CorrelationResult>> srt = StatisticsHelper.vectorPearsonCorrelation(in, vectorColName);
        // block
        DataSet<Row> result = srt.flatMap(new FlatMapFunction<Tuple2<BaseVectorSummary, CorrelationResult>, Row>() {

            private static final long serialVersionUID = 2134644397476490118L;

            @Override
            public void flatMap(Tuple2<BaseVectorSummary, CorrelationResult> srt, Collector<Row> collector) throws Exception {
                new CorrelationDataConverter().save(srt.f1, collector);
            }
        });
        this.setOutput(result, new CorrelationDataConverter().getModelSchema());
    } else {
        DataSet<Row> data = StatisticsHelper.transformToColumns(in, null, vectorColName, null);
        DataSet<Row> rank = SpearmanCorrelation.calcRank(data, true);
        BatchOperator rankOp = new TableSourceBatchOp(DataSetConversionUtil.toTable(getMLEnvironmentId(), rank, new String[] { "col" }, new TypeInformation[] { Types.STRING })).setMLEnvironmentId(getMLEnvironmentId());
        VectorCorrelationBatchOp corrBatchOp = new VectorCorrelationBatchOp().setMLEnvironmentId(getMLEnvironmentId()).setSelectedCol("col");
        rankOp.link(corrBatchOp);
        this.setOutput(corrBatchOp.getDataSet(), corrBatchOp.getSchema());
    }
    return this;
}
Also used : CorrelationDataConverter(com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationDataConverter) CorrelationResult(com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationResult) TableSourceBatchOp(com.alibaba.alink.operator.batch.source.TableSourceBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BaseVectorSummary(com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary) Row(org.apache.flink.types.Row)

Aggregations

BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)2 TableSourceBatchOp (com.alibaba.alink.operator.batch.source.TableSourceBatchOp)2 CorrelationDataConverter (com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationDataConverter)2 CorrelationResult (com.alibaba.alink.operator.common.statistics.basicstatistic.CorrelationResult)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 Row (org.apache.flink.types.Row)2 BaseVectorSummary (com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary)1 TableSummary (com.alibaba.alink.operator.common.statistics.basicstatistic.TableSummary)1 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)1