Search in sources :

Example 1 with VectorSummarizerBatchOp

use of com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp in project Alink by alibaba.

the class Chap07 method c_4_1.

static void c_4_1() throws Exception {
    BatchOperator<?> source = new CsvSourceBatchOp().setFilePath(DATA_DIR + ORIGIN_FILE).setSchemaStr(SCHEMA_STRING).link(new VectorAssemblerBatchOp().setSelectedCols(FEATURE_COL_NAMES).setOutputCol(VECTOR_COL_NAME).setReservedCols(LABEL_COL_NAME));
    source.link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary("< Origin data >"));
    new VectorStandardScaler().setSelectedCol(VECTOR_COL_NAME).fit(source).transform(source).link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary("< after Vector Standard Scale >"));
    new VectorMinMaxScaler().setSelectedCol(VECTOR_COL_NAME).fit(source).transform(source).link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary("< after Vector MinMax Scale >"));
    new VectorMaxAbsScaler().setSelectedCol(VECTOR_COL_NAME).fit(source).transform(source).link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary("< after Vector MaxAbs Scale >"));
    BatchOperator.execute();
}
Also used : VectorMinMaxScaler(com.alibaba.alink.pipeline.dataproc.vector.VectorMinMaxScaler) VectorAssemblerBatchOp(com.alibaba.alink.operator.batch.dataproc.vector.VectorAssemblerBatchOp) VectorStandardScaler(com.alibaba.alink.pipeline.dataproc.vector.VectorStandardScaler) VectorSummarizerBatchOp(com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp) VectorMaxAbsScaler(com.alibaba.alink.pipeline.dataproc.vector.VectorMaxAbsScaler) CsvSourceBatchOp(com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)

Example 2 with VectorSummarizerBatchOp

use of com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp in project Alink by alibaba.

the class Chap13 method c_1.

static void c_1() throws Exception {
    if (!new File(DATA_DIR + SPARSE_TRAIN_FILE).exists()) {
        new MnistGzFileSourceBatchOp(DATA_DIR + "train-images-idx3-ubyte.gz", DATA_DIR + "train-labels-idx1-ubyte.gz", true).link(new AkSinkBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE));
        BatchOperator.execute();
        new MnistGzFileSourceBatchOp(DATA_DIR + "t10k-images-idx3-ubyte.gz", DATA_DIR + "t10k-labels-idx1-ubyte.gz", true).link(new AkSinkBatchOp().setFilePath(DATA_DIR + SPARSE_TEST_FILE));
        BatchOperator.execute();
        new MnistGzFileSourceBatchOp(DATA_DIR + "train-images-idx3-ubyte.gz", DATA_DIR + "train-labels-idx1-ubyte.gz", false).link(new AkSinkBatchOp().setFilePath(DATA_DIR + DENSE_TRAIN_FILE));
        BatchOperator.execute();
        new MnistGzFileSourceBatchOp(DATA_DIR + "t10k-images-idx3-ubyte.gz", DATA_DIR + "t10k-labels-idx1-ubyte.gz", false).link(new AkSinkBatchOp().setFilePath(DATA_DIR + DENSE_TEST_FILE));
        BatchOperator.execute();
    }
    new AkSourceBatchOp().setFilePath(DATA_DIR + DENSE_TRAIN_FILE).lazyPrint(1, "MNIST data").link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary());
    new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE).lazyPrint(1, "MNIST data").link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary());
    new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE).lazyPrintStatistics().groupBy(LABEL_COL_NAME, LABEL_COL_NAME + ", COUNT(*) AS cnt").orderBy("cnt", 100).lazyPrint(-1);
    BatchOperator.execute();
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) VectorSummarizerBatchOp(com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) File(java.io.File)

Example 3 with VectorSummarizerBatchOp

use of com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp in project Alink by alibaba.

the class PCATest method testDense.

private void testDense() {
    String[] colNames = new String[] { "id", "vec" };
    Object[][] data = new Object[][] { { 1, "0.1 0.2 0.3 0.4" }, { 2, "0.2 0.1 0.2 0.6" }, { 3, "0.2 0.3 0.5 0.4" }, { 4, "0.3 0.1 0.3 0.7" }, { 5, "0.4 0.2 0.4 0.4" } };
    MemSourceBatchOp source = new MemSourceBatchOp(data, colNames);
    PCA pca = new PCA().setK(3).setCalculationType("CORR").setPredictionCol("pred").setReservedCols("id").setVectorCol("vec");
    pca.enableLazyPrintModelInfo();
    PCAModel model = pca.fit(source);
    BatchOperator<?> predict = model.transform(source);
    VectorSummarizerBatchOp summarizerOp = new VectorSummarizerBatchOp().setSelectedCol("pred");
    summarizerOp.linkFrom(predict);
    summarizerOp.lazyCollectVectorSummary(new Consumer<BaseVectorSummary>() {

        @Override
        public void accept(BaseVectorSummary summary) {
            Assert.assertEquals(3.4416913763379853E-15, Math.abs(summary.sum().get(0)), 10e-8);
        }
    });
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) BaseVectorSummary(com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary) VectorSummarizerBatchOp(com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp)

Example 4 with VectorSummarizerBatchOp

use of com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp in project Alink by alibaba.

the class PCATest method testSparse.

private void testSparse() {
    String[] colNames = new String[] { "id", "vec" };
    Object[][] data = new Object[][] { { 1, "0:0.1 1:0.2 2:0.3 3:0.4" }, { 2, "0:0.2 1:0.1 2:0.2 3:0.6" }, { 3, "0:0.2 1:0.3 2:0.5 3:0.4" }, { 4, "0:0.3 1:0.1 2:0.3 3:0.7" }, { 5, "0:0.4 1:0.2 2:0.4 3:0.4" } };
    MemSourceBatchOp source = new MemSourceBatchOp(data, colNames);
    PCA pca = new PCA().setK(3).setCalculationType("CORR").setPredictionCol("pred").setReservedCols("id").setVectorCol("vec");
    pca.enableLazyPrintModelInfo();
    PCAModel model = pca.fit(source);
    BatchOperator<?> predict = model.transform(source);
    VectorSummarizerBatchOp summarizerOp = new VectorSummarizerBatchOp().setSelectedCol("pred");
    summarizerOp.linkFrom(predict);
    summarizerOp.lazyCollectVectorSummary(new Consumer<BaseVectorSummary>() {

        @Override
        public void accept(BaseVectorSummary summary) {
            Assert.assertEquals(3.4416913763379853E-15, Math.abs(summary.sum().get(0)), 10e-8);
        }
    });
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) BaseVectorSummary(com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary) VectorSummarizerBatchOp(com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp)

Example 5 with VectorSummarizerBatchOp

use of com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp in project Alink by alibaba.

the class PCATest method testTable.

public void testTable() throws Exception {
    String[] colNames = new String[] { "id", "f0", "f1", "f2", "f3" };
    Object[][] data = new Object[][] { { 1, 0.1, 0.2, 0.3, 0.4 }, { 2, 0.2, 0.1, 0.2, 0.6 }, { 3, 0.2, 0.3, 0.5, 0.4 }, { 4, 0.3, 0.1, 0.3, 0.7 }, { 5, 0.4, 0.2, 0.4, 0.4 } };
    MemSourceBatchOp source = new MemSourceBatchOp(data, colNames);
    PCA pca = new PCA().setK(3).setCalculationType("CORR").setPredictionCol("pred").setReservedCols("id").setSelectedCols("f0", "f1", "f2", "f3");
    pca.enableLazyPrintModelInfo();
    PCAModel model = pca.fit(source);
    BatchOperator<?> predict = model.transform(source);
    VectorSummarizerBatchOp summarizerOp = new VectorSummarizerBatchOp().setSelectedCol("pred");
    summarizerOp.linkFrom(predict);
    summarizerOp.lazyCollectVectorSummary(new Consumer<BaseVectorSummary>() {

        @Override
        public void accept(BaseVectorSummary summary) {
            Assert.assertEquals(3.1086244689504383E-15, Math.abs(summary.sum().get(0)), 10e-8);
        }
    });
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) BaseVectorSummary(com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary) VectorSummarizerBatchOp(com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp)

Aggregations

VectorSummarizerBatchOp (com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp)5 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)3 BaseVectorSummary (com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary)3 VectorAssemblerBatchOp (com.alibaba.alink.operator.batch.dataproc.vector.VectorAssemblerBatchOp)1 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)1 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)1 CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)1 VectorMaxAbsScaler (com.alibaba.alink.pipeline.dataproc.vector.VectorMaxAbsScaler)1 VectorMinMaxScaler (com.alibaba.alink.pipeline.dataproc.vector.VectorMinMaxScaler)1 VectorStandardScaler (com.alibaba.alink.pipeline.dataproc.vector.VectorStandardScaler)1 File (java.io.File)1