use of com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary in project Alink by alibaba.
the class StatisticsHelperTest method dataSetSummary.
@Test
public void dataSetSummary() throws Exception {
BatchOperator data = getDenseBatch();
DataSet<BaseVectorSummary> dataSet = StatisticsHelper.summary(data.getDataSet().map(new MapFunction<Row, Vector>() {
private static final long serialVersionUID = -6512822331768742553L;
@Override
public Vector map(Row in) throws Exception {
return VectorUtil.getVector(in.getField(1));
}
}));
BaseVectorSummary summary = dataSet.collect().get(0);
assertEquals(summary.vectorSize(), 3);
assertEquals(summary.count(), 4);
assertEquals(summary.max(2), 4.0, 10e-4);
assertEquals(summary.min(1), 0.0, 10e-4);
assertEquals(summary.mean(2), 1.25, 10e-4);
assertEquals(summary.variance(2), 8.9167, 10e-4);
assertEquals(summary.standardDeviation(2), 2.9861, 10e-4);
assertEquals(summary.normL1(2), 11.0, 10e-4);
assertEquals(summary.normL2(2), 5.7446, 10e-4);
}
use of com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary in project Alink by alibaba.
the class VectorSummarizerBatchOpTest method test.
@Test
public void test() {
Row[] testArray = new Row[] { Row.of("1.0 2.0"), Row.of("-1.0 -3.0"), Row.of("4.0 2.0") };
String selectedColName = "vec";
String[] colNames = new String[] { selectedColName };
MemSourceBatchOp source = new MemSourceBatchOp(Arrays.asList(testArray), colNames);
VectorSummarizerBatchOp summarizer = new VectorSummarizerBatchOp().setSelectedCol("vec");
summarizer.linkFrom(source);
BaseVectorSummary srt = summarizer.collectVectorSummary();
System.out.println(srt);
Assert.assertEquals(srt.vectorSize(), 2);
Assert.assertEquals(srt.count(), 3);
Assert.assertEquals(srt.max(0), 4.0, 10e-4);
Assert.assertEquals(srt.min(0), -1.0, 10e-4);
Assert.assertEquals(srt.mean(0), 1.3333333333333333, 10e-4);
Assert.assertEquals(srt.variance(0), 6.333333333333334, 10e-4);
Assert.assertEquals(srt.standardDeviation(0), 2.5166114784235836, 10e-4);
Assert.assertEquals(srt.normL1(0), 6.0, 10e-4);
Assert.assertEquals(srt.normL2(0), 4.242640687119285, 10e-4);
}
use of com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary in project Alink by alibaba.
the class PCATest method testSparse.
private void testSparse() {
String[] colNames = new String[] { "id", "vec" };
Object[][] data = new Object[][] { { 1, "0:0.1 1:0.2 2:0.3 3:0.4" }, { 2, "0:0.2 1:0.1 2:0.2 3:0.6" }, { 3, "0:0.2 1:0.3 2:0.5 3:0.4" }, { 4, "0:0.3 1:0.1 2:0.3 3:0.7" }, { 5, "0:0.4 1:0.2 2:0.4 3:0.4" } };
MemSourceBatchOp source = new MemSourceBatchOp(data, colNames);
PCA pca = new PCA().setK(3).setCalculationType("CORR").setPredictionCol("pred").setReservedCols("id").setVectorCol("vec");
pca.enableLazyPrintModelInfo();
PCAModel model = pca.fit(source);
BatchOperator<?> predict = model.transform(source);
VectorSummarizerBatchOp summarizerOp = new VectorSummarizerBatchOp().setSelectedCol("pred");
summarizerOp.linkFrom(predict);
summarizerOp.lazyCollectVectorSummary(new Consumer<BaseVectorSummary>() {
@Override
public void accept(BaseVectorSummary summary) {
Assert.assertEquals(3.4416913763379853E-15, Math.abs(summary.sum().get(0)), 10e-8);
}
});
}
use of com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary in project Alink by alibaba.
the class PCATest method testTable.
public void testTable() throws Exception {
String[] colNames = new String[] { "id", "f0", "f1", "f2", "f3" };
Object[][] data = new Object[][] { { 1, 0.1, 0.2, 0.3, 0.4 }, { 2, 0.2, 0.1, 0.2, 0.6 }, { 3, 0.2, 0.3, 0.5, 0.4 }, { 4, 0.3, 0.1, 0.3, 0.7 }, { 5, 0.4, 0.2, 0.4, 0.4 } };
MemSourceBatchOp source = new MemSourceBatchOp(data, colNames);
PCA pca = new PCA().setK(3).setCalculationType("CORR").setPredictionCol("pred").setReservedCols("id").setSelectedCols("f0", "f1", "f2", "f3");
pca.enableLazyPrintModelInfo();
PCAModel model = pca.fit(source);
BatchOperator<?> predict = model.transform(source);
VectorSummarizerBatchOp summarizerOp = new VectorSummarizerBatchOp().setSelectedCol("pred");
summarizerOp.linkFrom(predict);
summarizerOp.lazyCollectVectorSummary(new Consumer<BaseVectorSummary>() {
@Override
public void accept(BaseVectorSummary summary) {
Assert.assertEquals(3.1086244689504383E-15, Math.abs(summary.sum().get(0)), 10e-8);
}
});
}
Aggregations