Search in sources :

Example 91 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class OnlineLogLikelihood method logLikelihood.

/**
 * Calculate the log likelihood.
 */
public static double logLikelihood(List<Vector> data, DenseMatrix lambda, DenseMatrix alpha, DenseMatrix gammad, int numTopic, int vocabularySize, double beta, int taskNum, int gammaShape, RandomDataGenerator random) {
    boolean isRandGamma = gammad == null;
    DenseMatrix ELogBeta = LdaUtil.dirichletExpectation(lambda).transpose();
    DenseMatrix expELogBeta = LdaUtil.expDirichletExpectation(lambda).transpose();
    double corpusPart = 0;
    // corpus part: E[log p(theta | alpha) - log q(theta | Gamma)]
    if (data != null) {
        for (Vector vector : data) {
            double docBound = 0;
            SparseVector sv = (SparseVector) vector;
            sv.removeZeroValues();
            if (isRandGamma) {
                gammad = LdaUtil.geneGamma(numTopic, gammaShape, random);
            }
            gammad = LdaUtil.getTopicDistributionMethod(sv, expELogBeta, alpha, gammad, numTopic).f0;
            DenseMatrix ELogThetad = LdaUtil.dirichletExpectationVec(gammad);
            for (int i = 0; i < sv.numberOfValues(); i++) {
                DenseMatrix ELogBetaDoc = new DenseMatrix(numTopic, 1, ELogBeta.getRow(sv.getIndices()[i]));
                docBound += sv.getValues()[i] * LdaUtil.logSumExp(ELogThetad.plus(ELogBetaDoc));
            }
            docBound += LdaUtil.elementWiseProduct(alpha.minus(gammad), ELogThetad).sum();
            docBound += LdaUtil.logGamma(gammad).minus(LdaUtil.logGamma(alpha)).sum();
            docBound += LdaUtil.logGamma(alpha.sum()) - LdaUtil.logGamma(gammad.sum());
            corpusPart += docBound;
        }
    }
    double sumEta = beta * vocabularySize;
    double topicsPart = LdaUtil.elementWiseProduct(lambda.transpose().plus(-beta).scale(-1), ELogBeta).sum() + LdaUtil.logGamma(lambda.transpose()).plus(-LdaUtil.logGamma(beta)).sum() - LdaUtil.logGamma(LdaUtil.sumByRow(lambda.transpose())).plus(-LdaUtil.logGamma(sumEta)).sum();
    double logLikelihood = corpusPart + topicsPart / taskNum;
    return logLikelihood;
}
Also used : SparseVector(com.alibaba.alink.common.linalg.SparseVector) Vector(com.alibaba.alink.common.linalg.Vector) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseMatrix(com.alibaba.alink.common.linalg.DenseMatrix)

Example 92 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class VectorFunctionMapper method procMaxMin.

protected Object procMaxMin(Object input) {
    int dstIdx = 0;
    double dstVal;
    Vector vectorInput = VectorUtil.getVector(input);
    if (vectorInput == null || vectorInput.size() == 0) {
        return vectorInput;
    }
    boolean equalMin = funcName.equals(HasFuncName.FuncName.ArgMin) || funcName.equals(HasFuncName.FuncName.Min);
    boolean equalMax = funcName.equals(HasFuncName.FuncName.ArgMax) || funcName.equals(HasFuncName.FuncName.Max);
    if (vectorInput instanceof SparseVector) {
        SparseVector sv = (SparseVector) vectorInput;
        int[] indices = sv.getIndices();
        double[] values = sv.getValues();
        if (equalMax) {
            dstVal = Double.NEGATIVE_INFINITY;
            for (int i = 0; i < sv.numberOfValues(); ++i) {
                if (dstVal < values[i]) {
                    dstVal = values[i];
                    dstIdx = indices[i];
                }
            }
        } else if (equalMin) {
            dstVal = Double.POSITIVE_INFINITY;
            for (int i = 0; i < sv.numberOfValues(); ++i) {
                if (dstVal > values[i]) {
                    dstVal = values[i];
                    dstIdx = indices[i];
                }
            }
        } else {
            throw new RuntimeException("Not implemented yet!");
        }
    } else {
        DenseVector dv = (DenseVector) vectorInput;
        if (equalMax) {
            dstVal = Double.NEGATIVE_INFINITY;
            for (int i = 0; i < dv.size(); ++i) {
                if (dstVal < dv.get(i)) {
                    dstVal = dv.get(i);
                    dstIdx = i;
                }
            }
        } else if (equalMin) {
            dstVal = Double.POSITIVE_INFINITY;
            for (int i = 0; i < dv.size(); ++i) {
                if (dstVal > dv.get(i)) {
                    dstVal = dv.get(i);
                    dstIdx = i;
                }
            }
        } else {
            throw new RuntimeException("Not implemented yet!");
        }
    }
    if (funcName.equals(FuncName.ArgMax) || funcName.equals(FuncName.ArgMin)) {
        return String.valueOf(dstIdx);
    } else {
        return String.valueOf(dstVal);
    }
}
Also used : SparseVector(com.alibaba.alink.common.linalg.SparseVector) Vector(com.alibaba.alink.common.linalg.Vector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector)

Example 93 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class VectorInteractionMapper method map.

@Override
protected Object map(Object[] input) {
    if (input.length != 2) {
        throw new IllegalArgumentException("VectorInteraction only support two input columns.");
    }
    if (input[0] == null || input[1] == null) {
        return null;
    }
    Vector vector1 = VectorUtil.getVector(input[0]);
    Vector vector2 = VectorUtil.getVector(input[1]);
    if (vector1 instanceof SparseVector) {
        if (vector2 instanceof DenseVector) {
            throw new IllegalArgumentException("Make sure the two input vectors are both dense or sparse.");
        }
        SparseVector sparseVector = (SparseVector) vector1;
        int vecSize = sparseVector.size();
        int[] indices = sparseVector.getIndices();
        double[] values = sparseVector.getValues();
        SparseVector scalingVector = (SparseVector) vector2;
        int scalingSize = scalingVector.size();
        int[] scalingIndices = scalingVector.getIndices();
        double[] scalingValues = scalingVector.getValues();
        double[] interactionValues = new double[scalingIndices.length * indices.length];
        int[] interactionIndices = new int[scalingIndices.length * indices.length];
        for (int i = 0; i < indices.length; ++i) {
            int idxBase = i * scalingIndices.length;
            for (int j = 0; j < scalingIndices.length; ++j) {
                int idx = idxBase + j;
                interactionIndices[idx] = vecSize * scalingIndices[j] + indices[i];
                interactionValues[idx] = values[i] * scalingValues[j];
            }
        }
        return new SparseVector(vecSize * scalingSize, interactionIndices, interactionValues);
    } else {
        if (vector2 instanceof SparseVector) {
            throw new IllegalArgumentException("Make sure the two input vectors are both dense or sparse.");
        }
        double[] vecArray = ((DenseVector) vector1).getData();
        double[] scalingArray = ((DenseVector) vector2).getData();
        DenseVector inter = new DenseVector(vecArray.length * scalingArray.length);
        double[] interArray = inter.getData();
        for (int i = 0; i < vecArray.length; ++i) {
            int idxBase = i * scalingArray.length;
            for (int j = 0; j < scalingArray.length; ++j) {
                interArray[idxBase + j] = vecArray[i] * scalingArray[j];
            }
        }
        return inter;
    }
}
Also used : SparseVector(com.alibaba.alink.common.linalg.SparseVector) Vector(com.alibaba.alink.common.linalg.Vector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector)

Example 94 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class VectorMinMaxScalerModelDataConverter method serializeModel.

/**
 * Serialize the model data to "Tuple3<Params, List<String>, List<Row>>".
 *
 * @param modelData The model data to serialize.
 * @return The serialization result.
 */
public Tuple3<Params, Iterable<String>, Iterable<Row>> serializeModel(Tuple3<Double, Double, BaseVectorSummary> modelData) {
    double min = modelData.f0;
    double max = modelData.f1;
    BaseVectorSummary summary = modelData.f2;
    double[] eMins;
    double[] eMaxs;
    if (summary.min() instanceof DenseVector) {
        eMins = ((DenseVector) summary.min()).getData();
    } else {
        eMins = ((SparseVector) summary.min()).toDenseVector().getData();
    }
    if (summary.max() instanceof DenseVector) {
        eMaxs = ((DenseVector) summary.max()).getData();
    } else {
        eMaxs = ((SparseVector) summary.max()).toDenseVector().getData();
    }
    List<String> data = new ArrayList<>();
    data.add(JsonConverter.toJson(eMins));
    data.add(JsonConverter.toJson(eMaxs));
    Params meta = new Params().set(VectorMinMaxScalerTrainParams.MIN, min).set(VectorMinMaxScalerTrainParams.MAX, max).set(VectorMinMaxScalerTrainParams.SELECTED_COL, vectorColName);
    return Tuple3.of(meta, data, new ArrayList<>());
}
Also used : BaseVectorSummary(com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary) ArrayList(java.util.ArrayList) VectorMinMaxScalerTrainParams(com.alibaba.alink.params.dataproc.vector.VectorMinMaxScalerTrainParams) Params(org.apache.flink.ml.api.misc.param.Params) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector)

Example 95 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class VectorStandardScalerModelDataConverter method serializeModel.

/**
 * Serialize the model data to "Tuple3<Params, List<String>, List<Row>>".
 *
 * @param modelData The model data to serialize.
 * @return The serialization result.
 */
public Tuple3<Params, Iterable<String>, Iterable<Row>> serializeModel(Tuple3<Boolean, Boolean, BaseVectorSummary> modelData) {
    Boolean withMean = modelData.f0;
    Boolean withStd = modelData.f1;
    BaseVectorSummary summary = modelData.f2;
    double[] means;
    double[] stdDeviations;
    int n = summary.vectorSize();
    if (withMean) {
        if (summary.mean() instanceof DenseVector) {
            means = ((DenseVector) summary.mean()).getData();
        } else {
            means = ((SparseVector) summary.mean()).toDenseVector().getData();
        }
    } else {
        means = new double[n];
    }
    if (withStd) {
        if (summary.standardDeviation() instanceof DenseVector) {
            stdDeviations = ((DenseVector) summary.standardDeviation()).getData();
        } else {
            stdDeviations = ((SparseVector) summary.standardDeviation()).toDenseVector().getData();
        }
    } else {
        stdDeviations = new double[n];
        Arrays.fill(stdDeviations, 1);
    }
    List<String> data = new ArrayList<>();
    data.add(JsonConverter.toJson(means));
    data.add(JsonConverter.toJson(stdDeviations));
    Params meta = new Params().set(VectorStandardTrainParams.WITH_MEAN, withMean).set(VectorStandardTrainParams.WITH_STD, withStd).set(VectorMinMaxScalerTrainParams.SELECTED_COL, vectorColName);
    return Tuple3.of(meta, data, new ArrayList<>());
}
Also used : BaseVectorSummary(com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary) ArrayList(java.util.ArrayList) VectorMinMaxScalerTrainParams(com.alibaba.alink.params.dataproc.vector.VectorMinMaxScalerTrainParams) VectorStandardTrainParams(com.alibaba.alink.params.dataproc.vector.VectorStandardTrainParams) Params(org.apache.flink.ml.api.misc.param.Params) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector)

Aggregations

SparseVector (com.alibaba.alink.common.linalg.SparseVector)125 Test (org.junit.Test)63 DenseVector (com.alibaba.alink.common.linalg.DenseVector)60 Params (org.apache.flink.ml.api.misc.param.Params)45 Row (org.apache.flink.types.Row)45 Vector (com.alibaba.alink.common.linalg.Vector)40 TableSchema (org.apache.flink.table.api.TableSchema)27 ArrayList (java.util.ArrayList)21 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)15 HashMap (java.util.HashMap)12 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)12 List (java.util.List)11 DenseMatrix (com.alibaba.alink.common.linalg.DenseMatrix)10 MTable (com.alibaba.alink.common.MTable)7 BaseVectorSummary (com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary)6 CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)6 Map (java.util.Map)6 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)5 VectorAssemblerParams (com.alibaba.alink.params.dataproc.vector.VectorAssemblerParams)5 OneHotPredictParams (com.alibaba.alink.params.feature.OneHotPredictParams)5