Search in sources :

Example 6 with AkSinkBatchOp

use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.

the class Chap13 method c_1.

static void c_1() throws Exception {
    if (!new File(DATA_DIR + SPARSE_TRAIN_FILE).exists()) {
        new MnistGzFileSourceBatchOp(DATA_DIR + "train-images-idx3-ubyte.gz", DATA_DIR + "train-labels-idx1-ubyte.gz", true).link(new AkSinkBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE));
        BatchOperator.execute();
        new MnistGzFileSourceBatchOp(DATA_DIR + "t10k-images-idx3-ubyte.gz", DATA_DIR + "t10k-labels-idx1-ubyte.gz", true).link(new AkSinkBatchOp().setFilePath(DATA_DIR + SPARSE_TEST_FILE));
        BatchOperator.execute();
        new MnistGzFileSourceBatchOp(DATA_DIR + "train-images-idx3-ubyte.gz", DATA_DIR + "train-labels-idx1-ubyte.gz", false).link(new AkSinkBatchOp().setFilePath(DATA_DIR + DENSE_TRAIN_FILE));
        BatchOperator.execute();
        new MnistGzFileSourceBatchOp(DATA_DIR + "t10k-images-idx3-ubyte.gz", DATA_DIR + "t10k-labels-idx1-ubyte.gz", false).link(new AkSinkBatchOp().setFilePath(DATA_DIR + DENSE_TEST_FILE));
        BatchOperator.execute();
    }
    new AkSourceBatchOp().setFilePath(DATA_DIR + DENSE_TRAIN_FILE).lazyPrint(1, "MNIST data").link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary());
    new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE).lazyPrint(1, "MNIST data").link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary());
    new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE).lazyPrintStatistics().groupBy(LABEL_COL_NAME, LABEL_COL_NAME + ", COUNT(*) AS cnt").orderBy("cnt", 100).lazyPrint(-1);
    BatchOperator.execute();
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) VectorSummarizerBatchOp(com.alibaba.alink.operator.batch.statistics.VectorSummarizerBatchOp) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) File(java.io.File)

Example 7 with AkSinkBatchOp

use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.

the class Chap18 method c_2.

static void c_2() throws Exception {
    AkSourceBatchOp batch_source = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
    AkSourceStreamOp stream_source = new AkSourceStreamOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
    if (!new File(DATA_DIR + INIT_MODEL_FILE).exists()) {
        batch_source.sampleWithSize(100).link(new KMeansTrainBatchOp().setVectorCol(VECTOR_COL_NAME).setK(10)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE));
        BatchOperator.execute();
    }
    AkSourceBatchOp init_model = new AkSourceBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE);
    new KMeansPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).linkFrom(init_model, batch_source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Batch Prediction"));
    BatchOperator.execute();
    stream_source.link(new KMeansPredictStreamOp(init_model).setPredictionCol(PREDICTION_COL_NAME)).link(new AkSinkStreamOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).setOverwriteSink(true));
    StreamOperator.execute();
    new AkSourceBatchOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Stream Prediction"));
    BatchOperator.execute();
}
Also used : KMeansPredictBatchOp(com.alibaba.alink.operator.batch.clustering.KMeansPredictBatchOp) AkSinkStreamOp(com.alibaba.alink.operator.stream.sink.AkSinkStreamOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) AkSourceStreamOp(com.alibaba.alink.operator.stream.source.AkSourceStreamOp) KMeansPredictStreamOp(com.alibaba.alink.operator.stream.clustering.KMeansPredictStreamOp) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) File(java.io.File) KMeansTrainBatchOp(com.alibaba.alink.operator.batch.clustering.KMeansTrainBatchOp) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Example 8 with AkSinkBatchOp

use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.

the class Chap19 method c_3.

static void c_3() throws Exception {
    AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
    source.link(new PcaTrainBatchOp().setK(39).setCalculationType(CalculationType.COV).setVectorCol(VECTOR_COL_NAME).lazyPrintModelInfo()).link(new AkSinkBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE).setOverwriteSink(true));
    BatchOperator.execute();
    BatchOperator<?> pca_result = new PcaPredictBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME).linkFrom(new AkSourceBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE), source);
    Stopwatch sw = new Stopwatch();
    KMeans kmeans = new KMeans().setK(10).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME);
    sw.reset();
    sw.start();
    kmeans.fit(source).transform(source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("KMeans"));
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
    sw.reset();
    sw.start();
    kmeans.fit(pca_result).transform(pca_result).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("KMeans + PCA"));
    BatchOperator.execute();
    sw.stop();
    System.out.println(sw.getElapsedTimeSpan());
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) KMeans(com.alibaba.alink.pipeline.clustering.KMeans) PcaPredictBatchOp(com.alibaba.alink.operator.batch.feature.PcaPredictBatchOp) PcaTrainBatchOp(com.alibaba.alink.operator.batch.feature.PcaTrainBatchOp) Stopwatch(com.alibaba.alink.common.utils.Stopwatch) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) EvalClusterBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)

Example 9 with AkSinkBatchOp

use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.

the class AkSourceSinkTest method testBatchSink.

public void testBatchSink() throws Exception {
    BatchOperator data = Iris.getBatchData();
    data.link(new AkSinkBatchOp().setFilePath(new File(path, "af1").getAbsolutePath()).setOverwriteSink(true));
    data.link(new AkSinkBatchOp().setFilePath(new File(path, "ad2").getAbsolutePath()).setNumFiles(2).setOverwriteSink(true));
    BatchOperator.execute();
}
Also used : AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) File(java.io.File) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator)

Example 10 with AkSinkBatchOp

use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.

the class ZipFileSourceSinkTest method testBatchSourceSinkSingleFile.

@Category(DbTest.class)
@Test
public void testBatchSourceSinkSingleFile() throws Exception {
    String filePath = path + "/file1.zip";
    data.link(new AkSinkBatchOp().setFilePath(filePath).setOverwriteSink(true));
    BatchOperator.execute();
    BatchOperator source = new AkSourceBatchOp().setFilePath(filePath);
    Assert.assertEquals(source.count(), 6);
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) Category(org.junit.experimental.categories.Category) Test(org.junit.Test) DbTest(com.alibaba.alink.testutil.categories.DbTest)

Aggregations

AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)27 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)20 File (java.io.File)15 CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)6 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)5 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)4 EvalClusterBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)4 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)3 LogisticRegression (com.alibaba.alink.pipeline.classification.LogisticRegression)3 FilePath (com.alibaba.alink.common.io.filesystem.FilePath)2 HadoopFileSystem (com.alibaba.alink.common.io.filesystem.HadoopFileSystem)2 OssFileSystem (com.alibaba.alink.common.io.filesystem.OssFileSystem)2 Stopwatch (com.alibaba.alink.common.utils.Stopwatch)2 LogisticRegressionTrainBatchOp (com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp)2 KMeansPredictBatchOp (com.alibaba.alink.operator.batch.clustering.KMeansPredictBatchOp)2 KMeansTrainBatchOp (com.alibaba.alink.operator.batch.clustering.KMeansTrainBatchOp)2 SplitBatchOp (com.alibaba.alink.operator.batch.dataproc.SplitBatchOp)2 EvalBinaryClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)2 SegmentBatchOp (com.alibaba.alink.operator.batch.nlp.SegmentBatchOp)2 StopWordsRemoverBatchOp (com.alibaba.alink.operator.batch.nlp.StopWordsRemoverBatchOp)2