use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class AkSourceSinkTest method testBatchSource.
@Test
public void testBatchSource() throws Exception {
BatchOperator data1 = new AkSourceBatchOp().setFilePath(new File(path, "af1").getAbsolutePath());
BatchOperator data5 = new AkSourceBatchOp().setFilePath(new File(path, "ad2").getAbsolutePath());
data1.lazyPrint(4);
data5.lazyPrint(4);
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class ZipFileSourceSinkTest method testBatchSourceSinkSingleFile.
@Category(DbTest.class)
@Test
public void testBatchSourceSinkSingleFile() throws Exception {
String filePath = path + "/file1.zip";
data.link(new AkSinkBatchOp().setFilePath(filePath).setOverwriteSink(true));
BatchOperator.execute();
BatchOperator source = new AkSourceBatchOp().setFilePath(filePath);
Assert.assertEquals(source.count(), 6);
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class PipelineSaveAndLoadTest method test2.
@Test
public void test2() throws Exception {
String model_filename = "/tmp/model2.csv";
CsvSourceBatchOp source = new CsvSourceBatchOp().setSchemaStr("sepal_length double, sepal_width double, petal_length double, petal_width double, category string").setFilePath("https://alink-test-data.oss-cn-hangzhou.aliyuncs.com/iris.csv");
QuantileDiscretizerTrainBatchOp train = new QuantileDiscretizerTrainBatchOp().setNumBuckets(2).setSelectedCols("petal_length").linkFrom(source);
train.link(new AkSinkBatchOp().setFilePath(model_filename).setOverwriteSink(true));
BatchOperator.execute();
// # save pipeline model data to file
String pipelineModelFilename = "/tmp/model23424.csv";
QuantileDiscretizer stage1 = new QuantileDiscretizer().setNumBuckets(2).setSelectedCols("sepal_length");
Binarizer stage2 = new Binarizer().setSelectedCol("petal_width").setThreshold(1.);
AkSourceBatchOp modelData = new AkSourceBatchOp().setFilePath(model_filename);
QuantileDiscretizerModel stage3 = new QuantileDiscretizerModel().setSelectedCols("petal_length").setModelData(modelData);
PipelineModel prevPipelineModel = new Pipeline(stage1, stage2, stage3).fit(source);
prevPipelineModel.save(pipelineModelFilename, true);
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class PipelineModel method load.
@Deprecated
public static PipelineModel load(FilePath filePath, Long mlEnvId) {
Tuple2<TableSchema, Row> schemaAndMeta = ModelExporterUtils.loadMetaFromAkFile(filePath);
Tuple2<StageNode[], Params> stagesAndParams = ModelExporterUtils.deserializePipelineStagesAndParamsFromMeta(schemaAndMeta.f1, schemaAndMeta.f0);
PipelineModel pipelineModel = new PipelineModel(stagesAndParams.f1);
pipelineModel.setTransformers(ModelExporterUtils.<TransformerBase<?>>fillPipelineStages(new AkSourceBatchOp().setFilePath(filePath).setMLEnvironmentId(mlEnvId), stagesAndParams.f0, schemaAndMeta.f0).toArray(new TransformerBase<?>[0]));
return pipelineModel;
}
use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.
the class Chap23 method c_2.
static void c_2() throws Exception {
if (!new File(DATA_DIR + TRAIN_FILE).exists()) {
ArrayList<Row> trainRows = new ArrayList<>();
ArrayList<Row> testRows = new ArrayList<>();
for (String label : new String[] { "pos", "neg" }) {
File subfolder = new File(ORIGIN_DATA_DIR + "train" + File.separator + label);
for (File f : subfolder.listFiles()) {
trainRows.add(Row.of(label, readFileContent(f)));
}
}
for (String label : new String[] { "pos", "neg" }) {
File subfolder = new File(ORIGIN_DATA_DIR + "test" + File.separator + label);
for (File f : subfolder.listFiles()) {
testRows.add(Row.of(label, readFileContent(f)));
}
}
new MemSourceBatchOp(trainRows, COL_NAMES).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TRAIN_FILE));
new MemSourceBatchOp(testRows, COL_NAMES).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TEST_FILE));
BatchOperator.execute();
}
AkSourceBatchOp train_set = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
AkSourceBatchOp test_set = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
train_set.lazyPrint(2);
new Pipeline().add(new RegexTokenizer().setPattern("\\W+").setSelectedCol(TXT_COL_NAME)).add(new DocCountVectorizer().setFeatureType("WORD_COUNT").setSelectedCol(TXT_COL_NAME).setOutputCol(VECTOR_COL_NAME).enableLazyPrintTransformData(1)).add(new LogisticRegression().setMaxIter(30).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME)).fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("DocCountVectorizer"));
BatchOperator.execute();
new Pipeline().add(new RegexTokenizer().setPattern("\\W+").setSelectedCol(TXT_COL_NAME)).add(new DocHashCountVectorizer().setFeatureType("WORD_COUNT").setSelectedCol(TXT_COL_NAME).setOutputCol(VECTOR_COL_NAME).enableLazyPrintTransformData(1)).add(new LogisticRegression().setMaxIter(30).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME)).fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("DocHashCountVectorizer"));
BatchOperator.execute();
}
Aggregations