use of com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp in project Alink by alibaba.
the class Chap07 method c_4_1.
static void c_4_1() throws Exception {
BatchOperator<?> source = new CsvSourceBatchOp().setFilePath(DATA_DIR + ORIGIN_FILE).setSchemaStr(SCHEMA_STRING).link(new VectorAssemblerBatchOp().setSelectedCols(FEATURE_COL_NAMES).setOutputCol(VECTOR_COL_NAME).setReservedCols(LABEL_COL_NAME));
source.link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary("< Origin data >"));
new VectorStandardScaler().setSelectedCol(VECTOR_COL_NAME).fit(source).transform(source).link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary("< after Vector Standard Scale >"));
new VectorMinMaxScaler().setSelectedCol(VECTOR_COL_NAME).fit(source).transform(source).link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary("< after Vector MinMax Scale >"));
new VectorMaxAbsScaler().setSelectedCol(VECTOR_COL_NAME).fit(source).transform(source).link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary("< after Vector MaxAbs Scale >"));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp in project Alink by alibaba.
the class Chap13 method c_1.
static void c_1() throws Exception {
if (!new File(DATA_DIR + SPARSE_TRAIN_FILE).exists()) {
new MnistGzFileSourceBatchOp(DATA_DIR + "train-images-idx3-ubyte.gz", DATA_DIR + "train-labels-idx1-ubyte.gz", true).link(new AkSinkBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE));
BatchOperator.execute();
new MnistGzFileSourceBatchOp(DATA_DIR + "t10k-images-idx3-ubyte.gz", DATA_DIR + "t10k-labels-idx1-ubyte.gz", true).link(new AkSinkBatchOp().setFilePath(DATA_DIR + SPARSE_TEST_FILE));
BatchOperator.execute();
new MnistGzFileSourceBatchOp(DATA_DIR + "train-images-idx3-ubyte.gz", DATA_DIR + "train-labels-idx1-ubyte.gz", false).link(new AkSinkBatchOp().setFilePath(DATA_DIR + DENSE_TRAIN_FILE));
BatchOperator.execute();
new MnistGzFileSourceBatchOp(DATA_DIR + "t10k-images-idx3-ubyte.gz", DATA_DIR + "t10k-labels-idx1-ubyte.gz", false).link(new AkSinkBatchOp().setFilePath(DATA_DIR + DENSE_TEST_FILE));
BatchOperator.execute();
}
new AkSourceBatchOp().setFilePath(DATA_DIR + DENSE_TRAIN_FILE).lazyPrint(1, "MNIST data").link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary());
new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE).lazyPrint(1, "MNIST data").link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary());
new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE).lazyPrintStatistics().groupBy(LABEL_COL_NAME, LABEL_COL_NAME + ", COUNT(*) AS cnt").orderBy("cnt", 100).lazyPrint(-1);
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp in project Alink by alibaba.
the class PCATest method testDense.
private void testDense() {
String[] colNames = new String[] { "id", "vec" };
Object[][] data = new Object[][] { { 1, "0.1 0.2 0.3 0.4" }, { 2, "0.2 0.1 0.2 0.6" }, { 3, "0.2 0.3 0.5 0.4" }, { 4, "0.3 0.1 0.3 0.7" }, { 5, "0.4 0.2 0.4 0.4" } };
MemSourceBatchOp source = new MemSourceBatchOp(data, colNames);
PCA pca = new PCA().setK(3).setCalculationType("CORR").setPredictionCol("pred").setReservedCols("id").setVectorCol("vec");
pca.enableLazyPrintModelInfo();
PCAModel model = pca.fit(source);
BatchOperator<?> predict = model.transform(source);
VectorSummarizerBatchOp summarizerOp = new VectorSummarizerBatchOp().setSelectedCol("pred");
summarizerOp.linkFrom(predict);
summarizerOp.lazyCollectVectorSummary(new Consumer<BaseVectorSummary>() {
@Override
public void accept(BaseVectorSummary summary) {
Assert.assertEquals(3.4416913763379853E-15, Math.abs(summary.sum().get(0)), 10e-8);
}
});
}
use of com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp in project Alink by alibaba.
the class PCATest method testSparse.
private void testSparse() {
String[] colNames = new String[] { "id", "vec" };
Object[][] data = new Object[][] { { 1, "0:0.1 1:0.2 2:0.3 3:0.4" }, { 2, "0:0.2 1:0.1 2:0.2 3:0.6" }, { 3, "0:0.2 1:0.3 2:0.5 3:0.4" }, { 4, "0:0.3 1:0.1 2:0.3 3:0.7" }, { 5, "0:0.4 1:0.2 2:0.4 3:0.4" } };
MemSourceBatchOp source = new MemSourceBatchOp(data, colNames);
PCA pca = new PCA().setK(3).setCalculationType("CORR").setPredictionCol("pred").setReservedCols("id").setVectorCol("vec");
pca.enableLazyPrintModelInfo();
PCAModel model = pca.fit(source);
BatchOperator<?> predict = model.transform(source);
VectorSummarizerBatchOp summarizerOp = new VectorSummarizerBatchOp().setSelectedCol("pred");
summarizerOp.linkFrom(predict);
summarizerOp.lazyCollectVectorSummary(new Consumer<BaseVectorSummary>() {
@Override
public void accept(BaseVectorSummary summary) {
Assert.assertEquals(3.4416913763379853E-15, Math.abs(summary.sum().get(0)), 10e-8);
}
});
}
use of com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp in project Alink by alibaba.
the class PCATest method testTable.
public void testTable() throws Exception {
String[] colNames = new String[] { "id", "f0", "f1", "f2", "f3" };
Object[][] data = new Object[][] { { 1, 0.1, 0.2, 0.3, 0.4 }, { 2, 0.2, 0.1, 0.2, 0.6 }, { 3, 0.2, 0.3, 0.5, 0.4 }, { 4, 0.3, 0.1, 0.3, 0.7 }, { 5, 0.4, 0.2, 0.4, 0.4 } };
MemSourceBatchOp source = new MemSourceBatchOp(data, colNames);
PCA pca = new PCA().setK(3).setCalculationType("CORR").setPredictionCol("pred").setReservedCols("id").setSelectedCols("f0", "f1", "f2", "f3");
pca.enableLazyPrintModelInfo();
PCAModel model = pca.fit(source);
BatchOperator<?> predict = model.transform(source);
VectorSummarizerBatchOp summarizerOp = new VectorSummarizerBatchOp().setSelectedCol("pred");
summarizerOp.linkFrom(predict);
summarizerOp.lazyCollectVectorSummary(new Consumer<BaseVectorSummary>() {
@Override
public void accept(BaseVectorSummary summary) {
Assert.assertEquals(3.1086244689504383E-15, Math.abs(summary.sum().get(0)), 10e-8);
}
});
}
Aggregations