use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class Chap13 method c_1.
static void c_1() throws Exception {
if (!new File(DATA_DIR + SPARSE_TRAIN_FILE).exists()) {
new MnistGzFileSourceBatchOp(DATA_DIR + "train-images-idx3-ubyte.gz", DATA_DIR + "train-labels-idx1-ubyte.gz", true).link(new AkSinkBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE));
BatchOperator.execute();
new MnistGzFileSourceBatchOp(DATA_DIR + "t10k-images-idx3-ubyte.gz", DATA_DIR + "t10k-labels-idx1-ubyte.gz", true).link(new AkSinkBatchOp().setFilePath(DATA_DIR + SPARSE_TEST_FILE));
BatchOperator.execute();
new MnistGzFileSourceBatchOp(DATA_DIR + "train-images-idx3-ubyte.gz", DATA_DIR + "train-labels-idx1-ubyte.gz", false).link(new AkSinkBatchOp().setFilePath(DATA_DIR + DENSE_TRAIN_FILE));
BatchOperator.execute();
new MnistGzFileSourceBatchOp(DATA_DIR + "t10k-images-idx3-ubyte.gz", DATA_DIR + "t10k-labels-idx1-ubyte.gz", false).link(new AkSinkBatchOp().setFilePath(DATA_DIR + DENSE_TEST_FILE));
BatchOperator.execute();
}
new AkSourceBatchOp().setFilePath(DATA_DIR + DENSE_TRAIN_FILE).lazyPrint(1, "MNIST data").link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary());
new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE).lazyPrint(1, "MNIST data").link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary());
new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE).lazyPrintStatistics().groupBy(LABEL_COL_NAME, LABEL_COL_NAME + ", COUNT(*) AS cnt").orderBy("cnt", 100).lazyPrint(-1);
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class Chap18 method c_2.
static void c_2() throws Exception {
AkSourceBatchOp batch_source = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
AkSourceStreamOp stream_source = new AkSourceStreamOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
if (!new File(DATA_DIR + INIT_MODEL_FILE).exists()) {
batch_source.sampleWithSize(100).link(new KMeansTrainBatchOp().setVectorCol(VECTOR_COL_NAME).setK(10)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE));
BatchOperator.execute();
}
AkSourceBatchOp init_model = new AkSourceBatchOp().setFilePath(DATA_DIR + INIT_MODEL_FILE);
new KMeansPredictBatchOp().setPredictionCol(PREDICTION_COL_NAME).linkFrom(init_model, batch_source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Batch Prediction"));
BatchOperator.execute();
stream_source.link(new KMeansPredictStreamOp(init_model).setPredictionCol(PREDICTION_COL_NAME)).link(new AkSinkStreamOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).setOverwriteSink(true));
StreamOperator.execute();
new AkSourceBatchOp().setFilePath(DATA_DIR + TEMP_STREAM_FILE).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("Stream Prediction"));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class Chap19 method c_3.
static void c_3() throws Exception {
AkSourceBatchOp source = new AkSourceBatchOp().setFilePath(DATA_DIR + SPARSE_TRAIN_FILE);
source.link(new PcaTrainBatchOp().setK(39).setCalculationType(CalculationType.COV).setVectorCol(VECTOR_COL_NAME).lazyPrintModelInfo()).link(new AkSinkBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE).setOverwriteSink(true));
BatchOperator.execute();
BatchOperator<?> pca_result = new PcaPredictBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(VECTOR_COL_NAME).linkFrom(new AkSourceBatchOp().setFilePath(DATA_DIR + PCA_MODEL_FILE), source);
Stopwatch sw = new Stopwatch();
KMeans kmeans = new KMeans().setK(10).setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME);
sw.reset();
sw.start();
kmeans.fit(source).transform(source).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("KMeans"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
sw.reset();
sw.start();
kmeans.fit(pca_result).transform(pca_result).link(new EvalClusterBatchOp().setVectorCol(VECTOR_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setLabelCol(LABEL_COL_NAME).lazyPrintMetrics("KMeans + PCA"));
BatchOperator.execute();
sw.stop();
System.out.println(sw.getElapsedTimeSpan());
}
use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class AkSourceSinkTest method testBatchSink.
public void testBatchSink() throws Exception {
BatchOperator data = Iris.getBatchData();
data.link(new AkSinkBatchOp().setFilePath(new File(path, "af1").getAbsolutePath()).setOverwriteSink(true));
data.link(new AkSinkBatchOp().setFilePath(new File(path, "ad2").getAbsolutePath()).setNumFiles(2).setOverwriteSink(true));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class ZipFileSourceSinkTest method testBatchSourceSinkSingleFile.
@Category(DbTest.class)
@Test
public void testBatchSourceSinkSingleFile() throws Exception {
String filePath = path + "/file1.zip";
data.link(new AkSinkBatchOp().setFilePath(filePath).setOverwriteSink(true));
BatchOperator.execute();
BatchOperator source = new AkSourceBatchOp().setFilePath(filePath);
Assert.assertEquals(source.count(), 6);
}
Aggregations