Search in sources :

Example 36 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class FastDistanceTest method initSparseData.

private static List<Vector> initSparseData(int len, int size) {
    Random random = new Random();
    List<Vector> list = new ArrayList<>();
    for (int i = 0; i < len; i++) {
        list.add(new SparseVector(size, new int[] { i % size, (i + 1) % size }, new double[] { random.nextDouble(), random.nextDouble() }));
    }
    return list;
}
Also used : Random(java.util.Random) ArrayList(java.util.ArrayList) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Vector(com.alibaba.alink.common.linalg.Vector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) SparseVector(com.alibaba.alink.common.linalg.SparseVector)

Example 37 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class FastDistanceTest method initSparseDataWithoutSize.

private static List<Vector> initSparseDataWithoutSize(int len, int size) {
    Random random = new Random();
    List<Vector> list = new ArrayList<>();
    for (int i = 0; i < len; i++) {
        list.add(new SparseVector(-1, new int[] { i % size, (i + 1) % size }, new double[] { random.nextDouble(), random.nextDouble() }));
    }
    return list;
}
Also used : Random(java.util.Random) ArrayList(java.util.ArrayList) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Vector(com.alibaba.alink.common.linalg.Vector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) SparseVector(com.alibaba.alink.common.linalg.SparseVector)

Example 38 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class FastDistanceTest method testSparseVectorTupleInput.

@Test
public void testSparseVectorTupleInput() {
    Vector vec = new SparseVector(10, new int[] { 1, 2 }, new double[] { 1.0, 1.0 });
    EuclideanDistance distance = new EuclideanDistance();
    FastDistanceVectorData vectorData = distance.prepareVectorData(Tuple2.of(vec, Row.of(0, "a")));
    assertVectorInput(vectorData, vec, Row.of(0, "a"));
}
Also used : SparseVector(com.alibaba.alink.common.linalg.SparseVector) Vector(com.alibaba.alink.common.linalg.Vector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Test(org.junit.Test)

Example 39 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class SparseVectorSummarizer method visit.

/**
 * update by vector.
 */
@Override
public BaseVectorSummarizer visit(Vector vec) {
    SparseVector sv;
    if (vec instanceof DenseVector) {
        DenseVector dv = (DenseVector) vec;
        int[] indices = new int[dv.size()];
        for (int i = 0; i < dv.size(); i++) {
            indices[i] = i;
        }
        sv = new SparseVector(dv.size(), indices, dv.getData());
    } else {
        sv = (SparseVector) vec;
    }
    count++;
    this.colNum = Math.max(this.colNum, sv.size());
    if (sv.numberOfValues() != 0) {
        // max index + 1 for size.
        VectorIterator iter = sv.iterator();
        while (iter.hasNext()) {
            int index = iter.getIndex();
            double value = iter.getValue();
            if (cols.containsKey(index)) {
                cols.get(index).visit(value);
            } else {
                VectorStatCol statCol = new VectorStatCol();
                statCol.visit(value);
                cols.put(index, statCol);
            }
            iter.next();
        }
        if (calculateOuterProduct) {
            int size = sv.getIndices()[sv.getIndices().length - 1] + 1;
            if (outerProduct == null) {
                outerProduct = DenseMatrix.zeros(size, size);
            } else {
                if (size > outerProduct.numRows()) {
                    DenseMatrix dpNew = DenseMatrix.zeros(size, size);
                    if (outerProduct != null) {
                        outerProduct = VectorSummarizerUtil.plusEqual(dpNew, outerProduct);
                    }
                }
            }
            for (int i = 0; i < sv.getIndices().length; i++) {
                double val = sv.getValues()[i];
                int iIdx = sv.getIndices()[i];
                for (int j = 0; j < sv.getIndices().length; j++) {
                    outerProduct.add(iIdx, sv.getIndices()[j], val * sv.getValues()[j]);
                }
            }
        }
    }
    return this;
}
Also used : SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) VectorIterator(com.alibaba.alink.common.linalg.VectorIterator) DenseMatrix(com.alibaba.alink.common.linalg.DenseMatrix)

Example 40 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class MultilayerPerceptronTrainBatchOp method getTrainingSamples.

/**
 * Get training samples from input data.
 */
private static DataSet<Tuple2<Double, DenseVector>> getTrainingSamples(BatchOperator data, DataSet<Tuple2<Long, Object>> labels, DataSet<DenseVector> maxAbs, final String[] featureColNames, final String vectorColName, final String labelColName, final int vecSize) {
    final boolean isVectorInput = !StringUtils.isNullOrWhitespaceOnly(vectorColName);
    final int vectorColIdx = isVectorInput ? TableUtil.findColIndexWithAssertAndHint(data.getColNames(), vectorColName) : -1;
    final int[] featureColIdx = isVectorInput ? null : TableUtil.findColIndicesWithAssertAndHint(data.getSchema(), featureColNames);
    final int labelColIdx = TableUtil.findColIndexWithAssertAndHint(data.getColNames(), labelColName);
    DataSet<Row> dataRows = data.getDataSet();
    return dataRows.map(new RichMapFunction<Row, Tuple2<Double, DenseVector>>() {

        private static final long serialVersionUID = -2883936655064900395L;

        transient Map<Comparable, Long> label2index;

        private DenseVector maxAbs;

        @Override
        public void open(Configuration parameters) throws Exception {
            List<Tuple2<Long, Object>> bcLabels = getRuntimeContext().getBroadcastVariable("labels");
            this.label2index = new HashMap<>();
            bcLabels.forEach(t2 -> {
                Long index = t2.f0;
                Comparable label = (Comparable) t2.f1;
                this.label2index.put(label, index);
            });
            maxAbs = (DenseVector) getRuntimeContext().getBroadcastVariable("maxAbs").get(0);
            for (int i = 0; i < maxAbs.size(); ++i) {
                if (maxAbs.get(i) == 0) {
                    maxAbs.set(i, 1.0);
                }
            }
        }

        @Override
        public Tuple2<Double, DenseVector> map(Row value) throws Exception {
            Comparable label = (Comparable) value.getField(labelColIdx);
            Long labelIdx = this.label2index.get(label);
            if (labelIdx == null) {
                throw new RuntimeException("unknown label: " + label);
            }
            if (isVectorInput) {
                Vector vec = VectorUtil.getVector(value.getField(vectorColIdx));
                DenseVector finalVec = null;
                if (null == vec) {
                    return new Tuple2<>(labelIdx.doubleValue(), null);
                } else {
                    if (vec instanceof DenseVector) {
                        finalVec = (DenseVector) vec;
                        for (int i = 0; i < maxAbs.size(); ++i) {
                            finalVec.set(i, finalVec.get(i) / maxAbs.get(i));
                        }
                    } else {
                        SparseVector tmpVec = (SparseVector) vec;
                        tmpVec.setSize(vecSize);
                        finalVec = tmpVec.toDenseVector();
                        int[] indices = ((SparseVector) vec).getIndices();
                        for (int i = 0; i < indices.length; ++i) {
                            finalVec.set(indices[i], finalVec.get(indices[i]) / maxAbs.get(indices[i]));
                        }
                    }
                }
                return new Tuple2<>(labelIdx.doubleValue(), finalVec);
            } else {
                int n = featureColIdx.length;
                DenseVector features = new DenseVector(n);
                for (int i = 0; i < n; i++) {
                    double v = ((Number) value.getField(featureColIdx[i])).doubleValue();
                    features.set(i, v / maxAbs.get(i));
                }
                return Tuple2.of(labelIdx.doubleValue(), features);
            }
        }
    }).withBroadcastSet(labels, "labels").withBroadcastSet(maxAbs, "maxAbs");
}
Also used : RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) Arrays(java.util.Arrays) TableUtil(com.alibaba.alink.common.utils.TableUtil) Topology(com.alibaba.alink.operator.common.classification.ann.Topology) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) FeedForwardTopology(com.alibaba.alink.operator.common.classification.ann.FeedForwardTopology) HashMap(java.util.HashMap) MapFunction(org.apache.flink.api.common.functions.MapFunction) ArrayList(java.util.ArrayList) DataSet(org.apache.flink.api.java.DataSet) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) ModelParamName(com.alibaba.alink.common.model.ModelParamName) Map(java.util.Map) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) MlpcModelDataConverter(com.alibaba.alink.operator.common.classification.ann.MlpcModelDataConverter) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Vector(com.alibaba.alink.common.linalg.Vector) DataSetUtils(org.apache.flink.api.java.utils.DataSetUtils) FeedForwardTrainer(com.alibaba.alink.operator.common.classification.ann.FeedForwardTrainer) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) Configuration(org.apache.flink.configuration.Configuration) StringUtils(org.apache.flink.util.StringUtils) List(java.util.List) DenseVector(com.alibaba.alink.common.linalg.DenseVector) MultilayerPerceptronTrainParams(com.alibaba.alink.params.classification.MultilayerPerceptronTrainParams) MapPartitionFunction(org.apache.flink.api.common.functions.MapPartitionFunction) VectorUtil(com.alibaba.alink.common.linalg.VectorUtil) Row(org.apache.flink.types.Row) MlpcModelData(com.alibaba.alink.operator.common.classification.ann.MlpcModelData) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Params(org.apache.flink.ml.api.misc.param.Params) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ArrayList(java.util.ArrayList) List(java.util.List) Row(org.apache.flink.types.Row) Vector(com.alibaba.alink.common.linalg.Vector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector)

Aggregations

SparseVector (com.alibaba.alink.common.linalg.SparseVector)125 Test (org.junit.Test)63 DenseVector (com.alibaba.alink.common.linalg.DenseVector)60 Params (org.apache.flink.ml.api.misc.param.Params)45 Row (org.apache.flink.types.Row)45 Vector (com.alibaba.alink.common.linalg.Vector)40 TableSchema (org.apache.flink.table.api.TableSchema)27 ArrayList (java.util.ArrayList)21 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)15 HashMap (java.util.HashMap)12 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)12 List (java.util.List)11 DenseMatrix (com.alibaba.alink.common.linalg.DenseMatrix)10 MTable (com.alibaba.alink.common.MTable)7 BaseVectorSummary (com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary)6 CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)6 Map (java.util.Map)6 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)5 VectorAssemblerParams (com.alibaba.alink.params.dataproc.vector.VectorAssemblerParams)5 OneHotPredictParams (com.alibaba.alink.params.feature.OneHotPredictParams)5