use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class MultivariateGaussian method logpdf.
/**
* Returns the log-density of this multivariate Gaussian at given point x .
*/
public double logpdf(Vector x) {
DenseVector delta = threadLocalDelta.get();
DenseVector v = threadLocalV.get();
int n = mean.size();
System.arraycopy(mean.getData(), 0, delta.getData(), 0, n);
BLAS.scal(-1.0, delta);
if (x instanceof DenseVector) {
BLAS.axpy(1., (DenseVector) x, delta);
} else if (x instanceof SparseVector) {
BLAS.axpy(1., (SparseVector) x, delta);
}
// Note that here beta is always zero otherwise we cannot achieve thread-safe.
BLAS.gemv(1.0, rootSigmaInv, true, delta, 0., v);
return u - 0.5 * BLAS.dot(v, v);
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class MinHashLSH method hashFunction.
/**
* indices: indexes of data in vec whose values are not zero.
* <p>
* hashValue = (((1 + indices) * randCoefficientA + randCoefficientB) % HASH_PRIME).min.
* <p>
* Here randCoefficientA and randCoefficientB are all real numbers chosen uniformly from the range [0,
* HASH_PRIME-1].
*/
@Override
public int[] hashFunction(Vector vec) {
int[] minHashSet = new int[randCoefficientsA.length];
if (randCoefficientsA.length > 0) {
int[] hashValues = new int[randCoefficientsA[0].length];
if (vec instanceof SparseVector) {
SparseVector elem = (SparseVector) vec;
int[] indices = elem.getIndices();
for (int i = 0; i < minHashSet.length; i++) {
for (int j = 0; j < hashValues.length; j++) {
int tmp = HASH_PRIME, cur;
for (int index : indices) {
cur = (int) ((1L + index) * randCoefficientsA[i][j] + randCoefficientsB[i][j]) % HASH_PRIME;
tmp = Math.min(tmp, cur);
}
hashValues[j] = tmp;
}
minHashSet[i] = tableHash(hashValues);
}
} else if (vec instanceof DenseVector) {
double[] elem = ((DenseVector) vec).getData();
for (int i = 0; i < minHashSet.length; i++) {
for (int j = 0; j < hashValues.length; j++) {
int tmp = HASH_PRIME, cur;
for (int m = 0; m < elem.length; m++) {
if (elem[m] != 0) {
cur = (int) ((1L + m) * randCoefficientsA[i][j] + randCoefficientsB[i][j]) % HASH_PRIME;
tmp = Math.min(tmp, cur);
}
}
hashValues[j] = tmp;
}
minHashSet[i] = tableHash(hashValues);
}
}
}
return minHashSet;
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class DocCountVectorizerModelMapperTest method testWordCountType.
@Test
public void testWordCountType() throws Exception {
Row[] rows = new Row[] { Row.of(0L, "{\"minTF\":\"1.0\",\"featureType\":\"\\\"WORD_COUNT\\\"\"}"), Row.of(1048576L, "{\"f0\":\"i\",\"f1\":0.6931471805599453,\"f2\":6}"), Row.of(2097152L, "{\"f0\":\"e\",\"f1\":0.1823215567939546,\"f2\":2}"), Row.of(3145728L, "{\"f0\":\"a\",\"f1\":0.4054651081081644,\"f2\":0}"), Row.of(4194304L, "{\"f0\":\"b\",\"f1\":0.1823215567939546,\"f2\":1}"), Row.of(5242880L, "{\"f0\":\"c\",\"f1\":0.6931471805599453,\"f2\":7}"), Row.of(6291456L, "{\"f0\":\"h\",\"f1\":0.4054651081081644,\"f2\":3}"), Row.of(7340032L, "{\"f0\":\"d\",\"f1\":0.6931471805599453,\"f2\":4}"), Row.of(8388608L, "{\"f0\":\"j\",\"f1\":0.6931471805599453,\"f2\":5}"), Row.of(9437184L, "{\"f0\":\"g\",\"f1\":0.6931471805599453,\"f2\":8}"), Row.of(10485760L, "{\"f0\":\"n\",\"f1\":1.0986122886681098,\"f2\":9}"), Row.of(11534336L, "{\"f0\":\"f\",\"f1\":1.0986122886681098,\"f2\":10}") };
List<Row> model = Arrays.asList(rows);
Params params = new Params().set(DocCountVectorizerPredictParams.SELECTED_COL, "sentence");
DocCountVectorizerModelMapper mapper = new DocCountVectorizerModelMapper(modelSchema, dataSchema, params);
mapper.loadModel(model);
assertEquals(mapper.map(Row.of("a b c d e a a")).getField(0), new SparseVector(11, new int[] { 0, 1, 2, 4, 7 }, new double[] { 3.0, 1.0, 1.0, 1.0, 1.0 }));
assertEquals(mapper.getOutputSchema(), new TableSchema(new String[] { "sentence" }, new TypeInformation[] { VectorTypes.SPARSE_VECTOR }));
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class DocCountVectorizerModelMapperTest method testMinTF.
@Test
public void testMinTF() throws Exception {
Row[] rows = new Row[] { Row.of(0L, "{\"minTF\":\"0.2\",\"featureType\":\"\\\"BINARY\\\"\"}"), Row.of(1048576L, "{\"f0\":\"i\",\"f1\":0.6931471805599453,\"f2\":6}"), Row.of(2097152L, "{\"f0\":\"e\",\"f1\":0.1823215567939546,\"f2\":2}"), Row.of(3145728L, "{\"f0\":\"a\",\"f1\":0.4054651081081644,\"f2\":0}"), Row.of(4194304L, "{\"f0\":\"b\",\"f1\":0.1823215567939546,\"f2\":1}"), Row.of(5242880L, "{\"f0\":\"c\",\"f1\":0.6931471805599453,\"f2\":7}"), Row.of(6291456L, "{\"f0\":\"h\",\"f1\":0.4054651081081644,\"f2\":3}"), Row.of(7340032L, "{\"f0\":\"d\",\"f1\":0.6931471805599453,\"f2\":4}"), Row.of(8388608L, "{\"f0\":\"j\",\"f1\":0.6931471805599453,\"f2\":5}"), Row.of(9437184L, "{\"f0\":\"g\",\"f1\":0.6931471805599453,\"f2\":8}"), Row.of(10485760L, "{\"f0\":\"n\",\"f1\":1.0986122886681098,\"f2\":9}"), Row.of(11534336L, "{\"f0\":\"f\",\"f1\":1.0986122886681098,\"f2\":10}") };
List<Row> model = Arrays.asList(rows);
Params params = new Params().set(DocCountVectorizerPredictParams.SELECTED_COL, "sentence");
DocCountVectorizerModelMapper mapper = new DocCountVectorizerModelMapper(modelSchema, dataSchema, params);
mapper.loadModel(model);
assertEquals(mapper.map(Row.of("a b c d e a a b e")).getField(0), new SparseVector(11, new int[] { 0, 1, 2 }, new double[] { 1.0, 1.0, 1.0 }));
assertEquals(mapper.map(Row.of("a b c d")).getField(0), new SparseVector(11, new int[] { 0, 1, 4, 7 }, new double[] { 1.0, 1.0, 1.0, 1.0 }));
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class DocHashCountVectorizerModelMapperTest method testBinary.
@Test
public void testBinary() throws Exception {
Row[] rows = new Row[] { Row.of(0L, "{\"numFeatures\":\"20\",\"minTF\":\"1.0\",\"featureType\":\"\\\"BINARY\\\"\"}"), Row.of(1048576L, "{\"16\":0.4054651081081644,\"7\":0.0,\"13\":0.4054651081081644,\"14\":-0.5108256237659907," + "\"15\":-0.2876820724517809}") };
List<Row> model = Arrays.asList(rows);
Params params = new Params().set(DocHashCountVectorizerPredictParams.SELECTED_COL, "sentence");
DocHashCountVectorizerModelMapper mapper = new DocHashCountVectorizerModelMapper(modelSchema, dataSchema, params);
mapper.loadModel(model);
assertEquals(mapper.map(Row.of("a b c d a a ")).getField(0), new SparseVector(20, new int[] { 7, 13, 14, 15 }, new double[] { 1.0, 1.0, 1.0, 1.0 }));
}
Aggregations