use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class OnlineLogLikelihood method logLikelihood.
/**
* Calculate the log likelihood.
*/
public static double logLikelihood(List<Vector> data, DenseMatrix lambda, DenseMatrix alpha, DenseMatrix gammad, int numTopic, int vocabularySize, double beta, int taskNum, int gammaShape, RandomDataGenerator random) {
boolean isRandGamma = gammad == null;
DenseMatrix ELogBeta = LdaUtil.dirichletExpectation(lambda).transpose();
DenseMatrix expELogBeta = LdaUtil.expDirichletExpectation(lambda).transpose();
double corpusPart = 0;
// corpus part: E[log p(theta | alpha) - log q(theta | Gamma)]
if (data != null) {
for (Vector vector : data) {
double docBound = 0;
SparseVector sv = (SparseVector) vector;
sv.removeZeroValues();
if (isRandGamma) {
gammad = LdaUtil.geneGamma(numTopic, gammaShape, random);
}
gammad = LdaUtil.getTopicDistributionMethod(sv, expELogBeta, alpha, gammad, numTopic).f0;
DenseMatrix ELogThetad = LdaUtil.dirichletExpectationVec(gammad);
for (int i = 0; i < sv.numberOfValues(); i++) {
DenseMatrix ELogBetaDoc = new DenseMatrix(numTopic, 1, ELogBeta.getRow(sv.getIndices()[i]));
docBound += sv.getValues()[i] * LdaUtil.logSumExp(ELogThetad.plus(ELogBetaDoc));
}
docBound += LdaUtil.elementWiseProduct(alpha.minus(gammad), ELogThetad).sum();
docBound += LdaUtil.logGamma(gammad).minus(LdaUtil.logGamma(alpha)).sum();
docBound += LdaUtil.logGamma(alpha.sum()) - LdaUtil.logGamma(gammad.sum());
corpusPart += docBound;
}
}
double sumEta = beta * vocabularySize;
double topicsPart = LdaUtil.elementWiseProduct(lambda.transpose().plus(-beta).scale(-1), ELogBeta).sum() + LdaUtil.logGamma(lambda.transpose()).plus(-LdaUtil.logGamma(beta)).sum() - LdaUtil.logGamma(LdaUtil.sumByRow(lambda.transpose())).plus(-LdaUtil.logGamma(sumEta)).sum();
double logLikelihood = corpusPart + topicsPart / taskNum;
return logLikelihood;
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class VectorFunctionMapper method procMaxMin.
protected Object procMaxMin(Object input) {
int dstIdx = 0;
double dstVal;
Vector vectorInput = VectorUtil.getVector(input);
if (vectorInput == null || vectorInput.size() == 0) {
return vectorInput;
}
boolean equalMin = funcName.equals(HasFuncName.FuncName.ArgMin) || funcName.equals(HasFuncName.FuncName.Min);
boolean equalMax = funcName.equals(HasFuncName.FuncName.ArgMax) || funcName.equals(HasFuncName.FuncName.Max);
if (vectorInput instanceof SparseVector) {
SparseVector sv = (SparseVector) vectorInput;
int[] indices = sv.getIndices();
double[] values = sv.getValues();
if (equalMax) {
dstVal = Double.NEGATIVE_INFINITY;
for (int i = 0; i < sv.numberOfValues(); ++i) {
if (dstVal < values[i]) {
dstVal = values[i];
dstIdx = indices[i];
}
}
} else if (equalMin) {
dstVal = Double.POSITIVE_INFINITY;
for (int i = 0; i < sv.numberOfValues(); ++i) {
if (dstVal > values[i]) {
dstVal = values[i];
dstIdx = indices[i];
}
}
} else {
throw new RuntimeException("Not implemented yet!");
}
} else {
DenseVector dv = (DenseVector) vectorInput;
if (equalMax) {
dstVal = Double.NEGATIVE_INFINITY;
for (int i = 0; i < dv.size(); ++i) {
if (dstVal < dv.get(i)) {
dstVal = dv.get(i);
dstIdx = i;
}
}
} else if (equalMin) {
dstVal = Double.POSITIVE_INFINITY;
for (int i = 0; i < dv.size(); ++i) {
if (dstVal > dv.get(i)) {
dstVal = dv.get(i);
dstIdx = i;
}
}
} else {
throw new RuntimeException("Not implemented yet!");
}
}
if (funcName.equals(FuncName.ArgMax) || funcName.equals(FuncName.ArgMin)) {
return String.valueOf(dstIdx);
} else {
return String.valueOf(dstVal);
}
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class VectorInteractionMapper method map.
@Override
protected Object map(Object[] input) {
if (input.length != 2) {
throw new IllegalArgumentException("VectorInteraction only support two input columns.");
}
if (input[0] == null || input[1] == null) {
return null;
}
Vector vector1 = VectorUtil.getVector(input[0]);
Vector vector2 = VectorUtil.getVector(input[1]);
if (vector1 instanceof SparseVector) {
if (vector2 instanceof DenseVector) {
throw new IllegalArgumentException("Make sure the two input vectors are both dense or sparse.");
}
SparseVector sparseVector = (SparseVector) vector1;
int vecSize = sparseVector.size();
int[] indices = sparseVector.getIndices();
double[] values = sparseVector.getValues();
SparseVector scalingVector = (SparseVector) vector2;
int scalingSize = scalingVector.size();
int[] scalingIndices = scalingVector.getIndices();
double[] scalingValues = scalingVector.getValues();
double[] interactionValues = new double[scalingIndices.length * indices.length];
int[] interactionIndices = new int[scalingIndices.length * indices.length];
for (int i = 0; i < indices.length; ++i) {
int idxBase = i * scalingIndices.length;
for (int j = 0; j < scalingIndices.length; ++j) {
int idx = idxBase + j;
interactionIndices[idx] = vecSize * scalingIndices[j] + indices[i];
interactionValues[idx] = values[i] * scalingValues[j];
}
}
return new SparseVector(vecSize * scalingSize, interactionIndices, interactionValues);
} else {
if (vector2 instanceof SparseVector) {
throw new IllegalArgumentException("Make sure the two input vectors are both dense or sparse.");
}
double[] vecArray = ((DenseVector) vector1).getData();
double[] scalingArray = ((DenseVector) vector2).getData();
DenseVector inter = new DenseVector(vecArray.length * scalingArray.length);
double[] interArray = inter.getData();
for (int i = 0; i < vecArray.length; ++i) {
int idxBase = i * scalingArray.length;
for (int j = 0; j < scalingArray.length; ++j) {
interArray[idxBase + j] = vecArray[i] * scalingArray[j];
}
}
return inter;
}
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class VectorMinMaxScalerModelDataConverter method serializeModel.
/**
* Serialize the model data to "Tuple3<Params, List<String>, List<Row>>".
*
* @param modelData The model data to serialize.
* @return The serialization result.
*/
public Tuple3<Params, Iterable<String>, Iterable<Row>> serializeModel(Tuple3<Double, Double, BaseVectorSummary> modelData) {
double min = modelData.f0;
double max = modelData.f1;
BaseVectorSummary summary = modelData.f2;
double[] eMins;
double[] eMaxs;
if (summary.min() instanceof DenseVector) {
eMins = ((DenseVector) summary.min()).getData();
} else {
eMins = ((SparseVector) summary.min()).toDenseVector().getData();
}
if (summary.max() instanceof DenseVector) {
eMaxs = ((DenseVector) summary.max()).getData();
} else {
eMaxs = ((SparseVector) summary.max()).toDenseVector().getData();
}
List<String> data = new ArrayList<>();
data.add(JsonConverter.toJson(eMins));
data.add(JsonConverter.toJson(eMaxs));
Params meta = new Params().set(VectorMinMaxScalerTrainParams.MIN, min).set(VectorMinMaxScalerTrainParams.MAX, max).set(VectorMinMaxScalerTrainParams.SELECTED_COL, vectorColName);
return Tuple3.of(meta, data, new ArrayList<>());
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class VectorStandardScalerModelDataConverter method serializeModel.
/**
* Serialize the model data to "Tuple3<Params, List<String>, List<Row>>".
*
* @param modelData The model data to serialize.
* @return The serialization result.
*/
public Tuple3<Params, Iterable<String>, Iterable<Row>> serializeModel(Tuple3<Boolean, Boolean, BaseVectorSummary> modelData) {
Boolean withMean = modelData.f0;
Boolean withStd = modelData.f1;
BaseVectorSummary summary = modelData.f2;
double[] means;
double[] stdDeviations;
int n = summary.vectorSize();
if (withMean) {
if (summary.mean() instanceof DenseVector) {
means = ((DenseVector) summary.mean()).getData();
} else {
means = ((SparseVector) summary.mean()).toDenseVector().getData();
}
} else {
means = new double[n];
}
if (withStd) {
if (summary.standardDeviation() instanceof DenseVector) {
stdDeviations = ((DenseVector) summary.standardDeviation()).getData();
} else {
stdDeviations = ((SparseVector) summary.standardDeviation()).toDenseVector().getData();
}
} else {
stdDeviations = new double[n];
Arrays.fill(stdDeviations, 1);
}
List<String> data = new ArrayList<>();
data.add(JsonConverter.toJson(means));
data.add(JsonConverter.toJson(stdDeviations));
Params meta = new Params().set(VectorStandardTrainParams.WITH_MEAN, withMean).set(VectorStandardTrainParams.WITH_STD, withStd).set(VectorMinMaxScalerTrainParams.SELECTED_COL, vectorColName);
return Tuple3.of(meta, data, new ArrayList<>());
}
Aggregations