Search in sources :

Example 26 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class VectorMaxAbsScalerMapperTest method testSparse.

@Test
public void testSparse() throws Exception {
    Row[] rows = new Row[] { Row.of(0L, "{}", null), Row.of(1048576L, "[4.0,0.0,3.0]", null) };
    List<Row> model = Arrays.asList(rows);
    TableSchema dataSchema = new TableSchema(new String[] { "vec" }, new TypeInformation<?>[] { Types.STRING });
    Params params = new Params();
    VectorMaxAbsScalerModelMapper mapper = new VectorMaxAbsScalerModelMapper(modelSchema, dataSchema, params);
    mapper.loadModel(model);
    assertEquals(mapper.map(Row.of(new SparseVector(3, new int[] { 0, 2 }, new double[] { 1.0, 2.0 }))).getField(0), new SparseVector(3, new int[] { 0, 2 }, new double[] { 0.25, 0.6666666666666666 }));
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) Params(org.apache.flink.ml.api.misc.param.Params) Row(org.apache.flink.types.Row) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Test(org.junit.Test)

Example 27 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class KMeansUtilTest method updateSumMatrixSparseTest.

@Test
public void updateSumMatrixSparseTest() {
    int sampleWeight = 1;
    KMeansPredictModelData predictModelData = new KMeansModelDataConverter().load(modelRows);
    int vectorSize = predictModelData.params.vectorSize;
    List<FastDistanceVectorData> samples = new ArrayList<>();
    List<Vector> vectorList = new ArrayList<>();
    for (int i = 0; i < length; i++) {
        vectorList.add(new SparseVector(vectorSize, new int[] { i % vectorSize }, new double[] { i * i }));
    }
    vectorList.forEach(vec -> samples.add((predictModelData.params.distanceType.getFastDistance()).prepareVectorData(Tuple2.of(vec, null))));
    DenseMatrix distanceMatrix = new DenseMatrix(predictModelData.params.k, 1);
    double[] sumMatrix = new double[predictModelData.params.k * (vectorSize + 1)];
    for (FastDistanceVectorData sample : samples) {
        KMeansUtil.updateSumMatrix(sample, sampleWeight, predictModelData.centroids, predictModelData.params.vectorSize, sumMatrix, predictModelData.params.k, predictModelData.params.distanceType.getFastDistance(), distanceMatrix);
    }
    Arrays.equals(sumMatrix, new double[] { 117.0, 65.0, 89.0, 6.0, 9.0, 1.0, 4.0, 4.0 });
}
Also used : ArrayList(java.util.ArrayList) FastDistanceVectorData(com.alibaba.alink.operator.common.distance.FastDistanceVectorData) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Vector(com.alibaba.alink.common.linalg.Vector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseMatrix(com.alibaba.alink.common.linalg.DenseMatrix) Test(org.junit.Test)

Example 28 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class LocalKmeansFuncTest method kmeansSparseTest.

@Test
public void kmeansSparseTest() {
    int len = 10;
    int k = 2;
    int size = 20;
    EuclideanDistance distance = new EuclideanDistance();
    long[] sampleWeights = new long[len];
    FastDistanceVectorData[] samples = new FastDistanceVectorData[len];
    for (int i = 0; i < 10; i++) {
        sampleWeights[i] = i;
        samples[i] = distance.prepareVectorData(Tuple2.of(new SparseVector(size, new int[] { i, i + 1 }, new double[] { i, i }), null));
    }
    FastDistanceMatrixData initCentroid = kmeans(k, sampleWeights, samples, distance, size, 0);
    DenseMatrix initCentroidData = initCentroid.getVectors();
    Assert.assertEquals(initCentroidData.numCols(), k);
    Assert.assertEquals(new DenseVector(initCentroidData.getColumn(0)).normL2(), 8.615, 0.001);
    Assert.assertEquals(new DenseVector(initCentroidData.getColumn(1)).normL2(), 4.128, 0.001);
}
Also used : FastDistanceMatrixData(com.alibaba.alink.operator.common.distance.FastDistanceMatrixData) EuclideanDistance(com.alibaba.alink.operator.common.distance.EuclideanDistance) FastDistanceVectorData(com.alibaba.alink.operator.common.distance.FastDistanceVectorData) SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseVector(com.alibaba.alink.common.linalg.DenseVector) DenseMatrix(com.alibaba.alink.common.linalg.DenseMatrix) Test(org.junit.Test)

Example 29 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class LdaUtilTest method getTopicDistributionMethodTest.

@Test
public void getTopicDistributionMethodTest() {
    int row = 11;
    int col = 5;
    double[] temp = new double[] { 0.8936825549031158, 0.9650683744577933, 1.1760851442955271, 0.889011463028263, 1.0355502890838704, 1.1720254142865503, 0.8496512959061578, 1.1564109073902848, 0.8528198328651976, 1.072261907065107, 1.0112487630821958, 1.0288027427394206, 1.1256918577237478, 1.0641131417250107, 0.9830788207753957, 0.9519235842178695, 1.0531103642783968, 1.0846663792488604, 0.9317316401779444, 0.9816247167440154, 0.953061129524052, 0.8836097897537777, 0.8539728772760822, 1.109432137460693, 0.9801693423689286, 0.9385725168762017, 1.009886079821316, 0.9741390218380398, 0.8734624459614093, 0.8548583255850564, 0.8934120594879987, 1.0200469492393616, 0.9461610896051537, 1.1912819895664948, 0.9650275833536232, 0.9312815665885328, 0.984681817963758, 1.1412711858668625, 1.1159082714127344, 1.0219124026668207, 1.1052645047308647, 1.1380919062139254, 0.9684793634316371, 1.023922805813918, 1.0777999541431174, 0.8730213177341947, 1.0353598060502658, 1.047104264664753, 1.1284793487722498, 0.8898021261569816, 1.1634869627283706, 0.817874601150865, 1.0424867867765728, 1.167773175905418, 0.915224402643435 };
    DenseMatrix lambda = new DenseMatrix(row, col, temp, false);
    DenseMatrix expElogbeta = LdaUtil.expDirichletExpectation(lambda);
    DenseMatrix alpha = new DenseMatrix(5, 1, new double[] { 0.2, 0.3, 0.4, 0.5, 0.6 });
    DenseMatrix gammad = new DenseMatrix(5, 1, new double[] { 0.7, 0.8, 0.9, 1.0, 1.1 });
    // /////////////////////////////////////////////////////////////
    SparseVector sv = new SparseVector(11, new int[] { 0, 1, 2, 4, 5, 6, 7, 10 }, new double[] { 1.0, 2.0, 6.0, 2.0, 3.0, 1.0, 1.0, 3.0 });
    Tuple2<DenseMatrix, DenseMatrix> re = LdaUtil.getTopicDistributionMethod(sv, expElogbeta, alpha, gammad, 5);
    System.out.println(re);
    Assert.assertTrue(Math.abs(re.f0.get(3, 0) - 1.6055989357674745) < 10e-4);
    Assert.assertTrue(Math.abs(re.f1.get(2, 2) - 0.39534340684397445) < 10e-4);
}
Also used : SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseMatrix(com.alibaba.alink.common.linalg.DenseMatrix) Test(org.junit.Test)

Example 30 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class LdaUtilTest method getTopicDistributionMethodTest2.

@Test
public void getTopicDistributionMethodTest2() {
    int row = 11;
    int col = 5;
    double[] temp = new double[] { 0.8936825549031158, 0.9650683744577933, 1.1760851442955271, 0.889011463028263, 1.0355502890838704, 1.1720254142865503, 0.8496512959061578, 1.1564109073902848, 0.8528198328651976, 1.072261907065107, 1.0112487630821958, 1.0288027427394206, 1.1256918577237478, 1.0641131417250107, 0.9830788207753957, 0.9519235842178695, 1.0531103642783968, 1.0846663792488604, 0.9317316401779444, 0.9816247167440154, 0.953061129524052, 0.8836097897537777, 0.8539728772760822, 1.109432137460693, 0.9801693423689286, 0.9385725168762017, 1.009886079821316, 0.9741390218380398, 0.8734624459614093, 0.8548583255850564, 0.8934120594879987, 1.0200469492393616, 0.9461610896051537, 1.1912819895664948, 0.9650275833536232, 0.9312815665885328, 0.984681817963758, 1.1412711858668625, 1.1159082714127344, 1.0219124026668207, 1.1052645047308647, 1.1380919062139254, 0.9684793634316371, 1.023922805813918, 1.0777999541431174, 0.8730213177341947, 1.0353598060502658, 1.047104264664753, 1.1284793487722498, 0.8898021261569816, 1.1634869627283706, 0.817874601150865, 1.0424867867765728, 1.167773175905418, 0.915224402643435 };
    DenseMatrix lambda = new DenseMatrix(row, col, temp, false).transpose();
    System.out.println(lambda);
    DenseMatrix expElogbeta = LdaUtil.expDirichletExpectation(lambda).transpose();
    DenseMatrix alpha = new DenseMatrix(5, 1, new double[] { 0.2, 0.2, 0.2, 0.2, 0.2 });
    DenseMatrix gammad = new DenseMatrix(5, 1, new double[] { 0.7, 0.8, 0.9, 1.0, 1.1 });
    // /////////////////////////////////////////////////////////////
    // SparseVector sv = new SparseVector(11, new int[]{0, 1, 3, 4, 7, 10}, new double[]{1.0, 3.0, 1.0,
    // 3.0, 2.0, 1.0});
    // SparseVector sv = new SparseVector(11, new int[]{0, 1, 2, 4, 5, 6, 7, 10}, new double[]{1.0, 2.0,
    // 6.0, 2.0, 3.0, 1.0, 1.0, 3.0});
    // SparseVector sv = new SparseVector(11, new int[]{0, 1, 3, 4, 7, 10}, new double[]{1.0, 3.0, 1.0,
    // 3.0, 2.0, 1.0});
    SparseVector sv = new SparseVector(11, new int[] { 0, 1, 3, 6, 8, 9, 10 }, new double[] { 2.0, 1.0, 3.0, 5.0, 2.0, 2.0, 9.0 });
    Tuple2<DenseMatrix, DenseMatrix> re = LdaUtil.getTopicDistributionMethod(sv, expElogbeta, alpha, gammad, 5);
    System.out.println(re);
}
Also used : SparseVector(com.alibaba.alink.common.linalg.SparseVector) DenseMatrix(com.alibaba.alink.common.linalg.DenseMatrix) Test(org.junit.Test)

Aggregations

SparseVector (com.alibaba.alink.common.linalg.SparseVector)125 Test (org.junit.Test)63 DenseVector (com.alibaba.alink.common.linalg.DenseVector)60 Params (org.apache.flink.ml.api.misc.param.Params)45 Row (org.apache.flink.types.Row)45 Vector (com.alibaba.alink.common.linalg.Vector)40 TableSchema (org.apache.flink.table.api.TableSchema)27 ArrayList (java.util.ArrayList)21 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)15 HashMap (java.util.HashMap)12 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)12 List (java.util.List)11 DenseMatrix (com.alibaba.alink.common.linalg.DenseMatrix)10 MTable (com.alibaba.alink.common.MTable)7 BaseVectorSummary (com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary)6 CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)6 Map (java.util.Map)6 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)5 VectorAssemblerParams (com.alibaba.alink.params.dataproc.vector.VectorAssemblerParams)5 OneHotPredictParams (com.alibaba.alink.params.feature.OneHotPredictParams)5