Search in sources :

Example 1 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class VectorImputerModelDataConverter method serializeModel.

/**
 * Serialize the model data to "Tuple3<Params, List<String>, List<Row>>".
 *
 * @param modelData The model data to serialize.
 * @return The serialization result.
 */
public Tuple3<Params, Iterable<String>, Iterable<Row>> serializeModel(Tuple3<Strategy, BaseVectorSummary, Double> modelData) {
    Strategy strategy = modelData.f0;
    BaseVectorSummary summary = modelData.f1;
    double fillValue = modelData.f2;
    double[] values = null;
    Params meta = new Params().set(SELECTED_COL, vectorColName).set(STRATEGY, strategy);
    switch(strategy) {
        case MIN:
            if (summary.min() instanceof DenseVector) {
                values = ((DenseVector) summary.min()).getData();
            } else {
                values = ((SparseVector) summary.min()).toDenseVector().getData();
            }
            break;
        case MAX:
            if (summary.max() instanceof DenseVector) {
                values = ((DenseVector) summary.max()).getData();
            } else {
                values = ((SparseVector) summary.max()).toDenseVector().getData();
            }
            break;
        case MEAN:
            if (summary.mean() instanceof DenseVector) {
                values = ((DenseVector) summary.mean()).getData();
            } else {
                values = ((SparseVector) summary.mean()).getValues();
            }
            break;
        default:
            meta.set(FILL_VALUE, fillValue);
    }
    List<String> data = new ArrayList<>();
    data.add(JsonConverter.toJson(values));
    return Tuple3.of(meta, data, new ArrayList<>());
}
Also used : BaseVectorSummary(com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary) ArrayList(java.util.ArrayList) Strategy(com.alibaba.alink.params.dataproc.vector.VectorImputerTrainParams.Strategy) Params(org.apache.flink.ml.api.misc.param.Params) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector)

Example 2 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class VectorMaxAbsScalerModelDataConverter method serializeModel.

/**
 * Serialize the model data to "Tuple3<Params, List<String>, List<Row>>".
 *
 * @param modelData The model data to serialize.
 * @return The serialization result.
 */
public Tuple3<Params, Iterable<String>, Iterable<Row>> serializeModel(BaseVectorSummary modelData) {
    double[] maxData;
    double[] minData;
    if (modelData.max() instanceof DenseVector) {
        maxData = ((DenseVector) modelData.max()).getData();
    } else {
        maxData = ((SparseVector) modelData.max()).toDenseVector().getData();
    }
    if (modelData.min() instanceof DenseVector) {
        minData = ((DenseVector) modelData.min()).getData();
    } else {
        minData = ((SparseVector) modelData.min()).toDenseVector().getData();
    }
    double[] maxAbs = new double[maxData.length];
    for (int i = 0; i < maxAbs.length; i++) {
        maxAbs[i] = Math.max(Math.abs(minData[i]), Math.abs(maxData[i]));
    }
    List<String> data = new ArrayList<>();
    data.add(JsonConverter.toJson(maxAbs));
    return Tuple3.of(new Params(), data, new ArrayList<>());
}
Also used : ArrayList(java.util.ArrayList) Params(org.apache.flink.ml.api.misc.param.Params) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector)

Example 3 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class ManHattanDistance method calc.

@Override
void calc(FastDistanceVectorData vector, FastDistanceMatrixData matrix, double[] res) {
    Vector vec = vector.getVector();
    if (vec instanceof DenseVector) {
        double[] vecData = ((DenseVector) vec).getData();
        double[] matrixData = matrix.getVectors().getData();
        int vectorSize = vecData.length;
        for (int i = 0; i < matrix.getVectors().numCols(); i++) {
            res[i] = calc(vecData, 0, matrixData, i * vectorSize, vectorSize);
        }
    } else {
        int[] indices = ((SparseVector) vec).getIndices();
        double[] values = ((SparseVector) vec).getValues();
        DenseMatrix denseMatrix = matrix.getVectors();
        double[] matrixData = denseMatrix.getData();
        int cnt = 0;
        for (int i = 0; i < denseMatrix.numCols(); i++) {
            int p1 = 0;
            for (int j = 0; j < denseMatrix.numRows(); j++) {
                if (p1 < indices.length && indices[p1] == j) {
                    res[i] += Math.abs(values[p1] - matrixData[cnt++]);
                    p1++;
                } else {
                    res[i] += Math.abs(matrixData[cnt++]);
                }
            }
        }
    }
}
Also used : SparseVector(com.alibaba.alink.common.linalg.SparseVector) Vector(com.alibaba.alink.common.linalg.Vector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) DenseMatrix(com.alibaba.alink.common.linalg.DenseMatrix)

Example 4 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class EuclideanDistance method calc.

@Override
void calc(FastDistanceVectorData left, FastDistanceSparseData right, double[] res) {
    Arrays.fill(res, 0.0);
    int[][] rightIndices = right.getIndices();
    double[][] rightValues = right.getValues();
    if (left.vector instanceof DenseVector) {
        double[] vector = ((DenseVector) left.vector).getData();
        for (int i = 0; i < vector.length; i++) {
            if (null != rightIndices[i]) {
                for (int j = 0; j < rightIndices[i].length; j++) {
                    res[rightIndices[i][j]] -= rightValues[i][j] * vector[i];
                }
            }
        }
    } else {
        SparseVector vector = (SparseVector) left.getVector();
        int[] indices = vector.getIndices();
        double[] values = vector.getValues();
        for (int i = 0; i < indices.length; i++) {
            if (null != rightIndices[indices[i]]) {
                for (int j = 0; j < rightIndices[indices[i]].length; j++) {
                    res[rightIndices[indices[i]][j]] -= rightValues[indices[i]][j] * values[i];
                }
            }
        }
    }
    double vecLabel = left.label.get(0);
    double[] normL2Square = right.getLabel().getData();
    for (int i = 0; i < res.length; i++) {
        res[i] = Math.sqrt(Math.abs(vecLabel + normL2Square[i] + 2 * res[i]));
    }
}
Also used : SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector)

Example 5 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class VectorWriter method write.

@Override
public Tuple2<Boolean, Row> write(Map<String, String> in) {
    if (null == this.colNames) {
        int itemSize = in.size();
        int[] indices = new int[itemSize];
        double[] values = new double[itemSize];
        int count = 0;
        for (Map.Entry<String, String> entry : in.entrySet()) {
            if (!NumberUtils.isDigits(entry.getKey())) {
                return Tuple2.of(false, new Row(0));
            }
            indices[count] = Integer.parseInt(entry.getKey());
            if (!NumberUtils.isNumber(entry.getValue())) {
                return Tuple2.of(false, new Row(0));
            }
            values[count] = Double.parseDouble(entry.getValue());
            count++;
        }
        return new Tuple2<>(true, Row.of(new SparseVector((int) this.size, indices, values)));
    } else {
        StringBuilder sbd = new StringBuilder();
        int n = colNames.length;
        if (this.size > colNames.length) {
            sbd.append("$").append(this.size).append("$");
        } else if (this.size > 0 && this.size < colNames.length) {
            n = (int) this.size;
        }
        for (int i = 0; i < n; i++) {
            if (i > 0) {
                sbd.append(" ");
            }
            String v = in.get(colNames[i]);
            if (!NumberUtils.isNumber(v)) {
                return Tuple2.of(false, new Row(0));
            }
            sbd.append(v);
        }
        return new Tuple2<>(true, Row.of(VectorUtil.getVector(sbd.toString())));
    }
}
Also used : Tuple2(org.apache.flink.api.java.tuple.Tuple2) Row(org.apache.flink.types.Row) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Map(java.util.Map)

Aggregations

SparseVector (com.alibaba.alink.common.linalg.SparseVector)125 Test (org.junit.Test)63 DenseVector (com.alibaba.alink.common.linalg.DenseVector)60 Params (org.apache.flink.ml.api.misc.param.Params)45 Row (org.apache.flink.types.Row)45 Vector (com.alibaba.alink.common.linalg.Vector)40 TableSchema (org.apache.flink.table.api.TableSchema)27 ArrayList (java.util.ArrayList)21 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)15 HashMap (java.util.HashMap)12 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)12 List (java.util.List)11 DenseMatrix (com.alibaba.alink.common.linalg.DenseMatrix)10 MTable (com.alibaba.alink.common.MTable)7 BaseVectorSummary (com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary)6 CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)6 Map (java.util.Map)6 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)5 VectorAssemblerParams (com.alibaba.alink.params.dataproc.vector.VectorAssemblerParams)5 OneHotPredictParams (com.alibaba.alink.params.feature.OneHotPredictParams)5