Search in sources :

Example 71 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class MultivariateGaussian method logpdf.

/**
 * Returns the log-density of this multivariate Gaussian at given point x .
 */
public double logpdf(Vector x) {
    DenseVector delta = threadLocalDelta.get();
    DenseVector v = threadLocalV.get();
    int n = mean.size();
    System.arraycopy(mean.getData(), 0, delta.getData(), 0, n);
    BLAS.scal(-1.0, delta);
    if (x instanceof DenseVector) {
        BLAS.axpy(1., (DenseVector) x, delta);
    } else if (x instanceof SparseVector) {
        BLAS.axpy(1., (SparseVector) x, delta);
    }
    // Note that here beta is always zero otherwise we cannot achieve thread-safe.
    BLAS.gemv(1.0, rootSigmaInv, true, delta, 0., v);
    return u - 0.5 * BLAS.dot(v, v);
}
Also used : SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector)

Example 72 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class MinHashLSH method hashFunction.

/**
 * indices: indexes of data in vec whose values are not zero.
 * <p>
 * hashValue = (((1 + indices) * randCoefficientA + randCoefficientB) % HASH_PRIME).min.
 * <p>
 * Here randCoefficientA and randCoefficientB are all real numbers chosen uniformly from the range [0,
 * HASH_PRIME-1].
 */
@Override
public int[] hashFunction(Vector vec) {
    int[] minHashSet = new int[randCoefficientsA.length];
    if (randCoefficientsA.length > 0) {
        int[] hashValues = new int[randCoefficientsA[0].length];
        if (vec instanceof SparseVector) {
            SparseVector elem = (SparseVector) vec;
            int[] indices = elem.getIndices();
            for (int i = 0; i < minHashSet.length; i++) {
                for (int j = 0; j < hashValues.length; j++) {
                    int tmp = HASH_PRIME, cur;
                    for (int index : indices) {
                        cur = (int) ((1L + index) * randCoefficientsA[i][j] + randCoefficientsB[i][j]) % HASH_PRIME;
                        tmp = Math.min(tmp, cur);
                    }
                    hashValues[j] = tmp;
                }
                minHashSet[i] = tableHash(hashValues);
            }
        } else if (vec instanceof DenseVector) {
            double[] elem = ((DenseVector) vec).getData();
            for (int i = 0; i < minHashSet.length; i++) {
                for (int j = 0; j < hashValues.length; j++) {
                    int tmp = HASH_PRIME, cur;
                    for (int m = 0; m < elem.length; m++) {
                        if (elem[m] != 0) {
                            cur = (int) ((1L + m) * randCoefficientsA[i][j] + randCoefficientsB[i][j]) % HASH_PRIME;
                            tmp = Math.min(tmp, cur);
                        }
                    }
                    hashValues[j] = tmp;
                }
                minHashSet[i] = tableHash(hashValues);
            }
        }
    }
    return minHashSet;
}
Also used : SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector)

Example 73 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class DocCountVectorizerModelMapperTest method testWordCountType.

@Test
public void testWordCountType() throws Exception {
    Row[] rows = new Row[] { Row.of(0L, "{\"minTF\":\"1.0\",\"featureType\":\"\\\"WORD_COUNT\\\"\"}"), Row.of(1048576L, "{\"f0\":\"i\",\"f1\":0.6931471805599453,\"f2\":6}"), Row.of(2097152L, "{\"f0\":\"e\",\"f1\":0.1823215567939546,\"f2\":2}"), Row.of(3145728L, "{\"f0\":\"a\",\"f1\":0.4054651081081644,\"f2\":0}"), Row.of(4194304L, "{\"f0\":\"b\",\"f1\":0.1823215567939546,\"f2\":1}"), Row.of(5242880L, "{\"f0\":\"c\",\"f1\":0.6931471805599453,\"f2\":7}"), Row.of(6291456L, "{\"f0\":\"h\",\"f1\":0.4054651081081644,\"f2\":3}"), Row.of(7340032L, "{\"f0\":\"d\",\"f1\":0.6931471805599453,\"f2\":4}"), Row.of(8388608L, "{\"f0\":\"j\",\"f1\":0.6931471805599453,\"f2\":5}"), Row.of(9437184L, "{\"f0\":\"g\",\"f1\":0.6931471805599453,\"f2\":8}"), Row.of(10485760L, "{\"f0\":\"n\",\"f1\":1.0986122886681098,\"f2\":9}"), Row.of(11534336L, "{\"f0\":\"f\",\"f1\":1.0986122886681098,\"f2\":10}") };
    List<Row> model = Arrays.asList(rows);
    Params params = new Params().set(DocCountVectorizerPredictParams.SELECTED_COL, "sentence");
    DocCountVectorizerModelMapper mapper = new DocCountVectorizerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);
    assertEquals(mapper.map(Row.of("a b c d e a a")).getField(0), new SparseVector(11, new int[] { 0, 1, 2, 4, 7 }, new double[] { 3.0, 1.0, 1.0, 1.0, 1.0 }));
    assertEquals(mapper.getOutputSchema(), new TableSchema(new String[] { "sentence" }, new TypeInformation[] { VectorTypes.SPARSE_VECTOR }));
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) DocCountVectorizerPredictParams(com.alibaba.alink.params.nlp.DocCountVectorizerPredictParams) Params(org.apache.flink.ml.api.misc.param.Params) Row(org.apache.flink.types.Row) SparseVector(com.alibaba.alink.common.linalg.SparseVector) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 74 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class DocCountVectorizerModelMapperTest method testMinTF.

@Test
public void testMinTF() throws Exception {
    Row[] rows = new Row[] { Row.of(0L, "{\"minTF\":\"0.2\",\"featureType\":\"\\\"BINARY\\\"\"}"), Row.of(1048576L, "{\"f0\":\"i\",\"f1\":0.6931471805599453,\"f2\":6}"), Row.of(2097152L, "{\"f0\":\"e\",\"f1\":0.1823215567939546,\"f2\":2}"), Row.of(3145728L, "{\"f0\":\"a\",\"f1\":0.4054651081081644,\"f2\":0}"), Row.of(4194304L, "{\"f0\":\"b\",\"f1\":0.1823215567939546,\"f2\":1}"), Row.of(5242880L, "{\"f0\":\"c\",\"f1\":0.6931471805599453,\"f2\":7}"), Row.of(6291456L, "{\"f0\":\"h\",\"f1\":0.4054651081081644,\"f2\":3}"), Row.of(7340032L, "{\"f0\":\"d\",\"f1\":0.6931471805599453,\"f2\":4}"), Row.of(8388608L, "{\"f0\":\"j\",\"f1\":0.6931471805599453,\"f2\":5}"), Row.of(9437184L, "{\"f0\":\"g\",\"f1\":0.6931471805599453,\"f2\":8}"), Row.of(10485760L, "{\"f0\":\"n\",\"f1\":1.0986122886681098,\"f2\":9}"), Row.of(11534336L, "{\"f0\":\"f\",\"f1\":1.0986122886681098,\"f2\":10}") };
    List<Row> model = Arrays.asList(rows);
    Params params = new Params().set(DocCountVectorizerPredictParams.SELECTED_COL, "sentence");
    DocCountVectorizerModelMapper mapper = new DocCountVectorizerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);
    assertEquals(mapper.map(Row.of("a b c d e a a b e")).getField(0), new SparseVector(11, new int[] { 0, 1, 2 }, new double[] { 1.0, 1.0, 1.0 }));
    assertEquals(mapper.map(Row.of("a b c d")).getField(0), new SparseVector(11, new int[] { 0, 1, 4, 7 }, new double[] { 1.0, 1.0, 1.0, 1.0 }));
}
Also used : DocCountVectorizerPredictParams(com.alibaba.alink.params.nlp.DocCountVectorizerPredictParams) Params(org.apache.flink.ml.api.misc.param.Params) Row(org.apache.flink.types.Row) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Test(org.junit.Test)

Example 75 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class DocHashCountVectorizerModelMapperTest method testBinary.

@Test
public void testBinary() throws Exception {
    Row[] rows = new Row[] { Row.of(0L, "{\"numFeatures\":\"20\",\"minTF\":\"1.0\",\"featureType\":\"\\\"BINARY\\\"\"}"), Row.of(1048576L, "{\"16\":0.4054651081081644,\"7\":0.0,\"13\":0.4054651081081644,\"14\":-0.5108256237659907," + "\"15\":-0.2876820724517809}") };
    List<Row> model = Arrays.asList(rows);
    Params params = new Params().set(DocHashCountVectorizerPredictParams.SELECTED_COL, "sentence");
    DocHashCountVectorizerModelMapper mapper = new DocHashCountVectorizerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);
    assertEquals(mapper.map(Row.of("a b c d a a ")).getField(0), new SparseVector(20, new int[] { 7, 13, 14, 15 }, new double[] { 1.0, 1.0, 1.0, 1.0 }));
}
Also used : DocCountVectorizerTrainParams(com.alibaba.alink.params.nlp.DocCountVectorizerTrainParams) DocHashCountVectorizerPredictParams(com.alibaba.alink.params.nlp.DocHashCountVectorizerPredictParams) Params(org.apache.flink.ml.api.misc.param.Params) Row(org.apache.flink.types.Row) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Test(org.junit.Test)

Aggregations

SparseVector (com.alibaba.alink.common.linalg.SparseVector)125 Test (org.junit.Test)63 DenseVector (com.alibaba.alink.common.linalg.DenseVector)60 Params (org.apache.flink.ml.api.misc.param.Params)45 Row (org.apache.flink.types.Row)45 Vector (com.alibaba.alink.common.linalg.Vector)40 TableSchema (org.apache.flink.table.api.TableSchema)27 ArrayList (java.util.ArrayList)21 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)15 HashMap (java.util.HashMap)12 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)12 List (java.util.List)11 DenseMatrix (com.alibaba.alink.common.linalg.DenseMatrix)10 MTable (com.alibaba.alink.common.MTable)7 BaseVectorSummary (com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary)6 CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)6 Map (java.util.Map)6 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)5 VectorAssemblerParams (com.alibaba.alink.params.dataproc.vector.VectorAssemblerParams)5 OneHotPredictParams (com.alibaba.alink.params.feature.OneHotPredictParams)5