Search in sources :

Example 26 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class AkSourceSinkTest method testBatchSource.

@Test
public void testBatchSource() throws Exception {
    BatchOperator data1 = new AkSourceBatchOp().setFilePath(new File(path, "af1").getAbsolutePath());
    BatchOperator data5 = new AkSourceBatchOp().setFilePath(new File(path, "ad2").getAbsolutePath());
    data1.lazyPrint(4);
    data5.lazyPrint(4);
    BatchOperator.execute();
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) File(java.io.File) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) Test(org.junit.Test)

Example 27 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class ZipFileSourceSinkTest method testBatchSourceSinkSingleFile.

@Category(DbTest.class)
@Test
public void testBatchSourceSinkSingleFile() throws Exception {
    String filePath = path + "/file1.zip";
    data.link(new AkSinkBatchOp().setFilePath(filePath).setOverwriteSink(true));
    BatchOperator.execute();
    BatchOperator source = new AkSourceBatchOp().setFilePath(filePath);
    Assert.assertEquals(source.count(), 6);
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) BatchOperator(com.alibaba.alink.operator.batch.BatchOperator) Category(org.junit.experimental.categories.Category) Test(org.junit.Test) DbTest(com.alibaba.alink.testutil.categories.DbTest)

Example 28 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class PipelineSaveAndLoadTest method test2.

@Test
public void test2() throws Exception {
    String model_filename = "/tmp/model2.csv";
    CsvSourceBatchOp source = new CsvSourceBatchOp().setSchemaStr("sepal_length double, sepal_width double, petal_length double, petal_width double, category string").setFilePath("https://alink-test-data.oss-cn-hangzhou.aliyuncs.com/iris.csv");
    QuantileDiscretizerTrainBatchOp train = new QuantileDiscretizerTrainBatchOp().setNumBuckets(2).setSelectedCols("petal_length").linkFrom(source);
    train.link(new AkSinkBatchOp().setFilePath(model_filename).setOverwriteSink(true));
    BatchOperator.execute();
    // # save pipeline model data to file
    String pipelineModelFilename = "/tmp/model23424.csv";
    QuantileDiscretizer stage1 = new QuantileDiscretizer().setNumBuckets(2).setSelectedCols("sepal_length");
    Binarizer stage2 = new Binarizer().setSelectedCol("petal_width").setThreshold(1.);
    AkSourceBatchOp modelData = new AkSourceBatchOp().setFilePath(model_filename);
    QuantileDiscretizerModel stage3 = new QuantileDiscretizerModel().setSelectedCols("petal_length").setModelData(modelData);
    PipelineModel prevPipelineModel = new Pipeline(stage1, stage2, stage3).fit(source);
    prevPipelineModel.save(pipelineModelFilename, true);
    BatchOperator.execute();
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) QuantileDiscretizerModel(com.alibaba.alink.pipeline.feature.QuantileDiscretizerModel) QuantileDiscretizerTrainBatchOp(com.alibaba.alink.operator.batch.feature.QuantileDiscretizerTrainBatchOp) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) Binarizer(com.alibaba.alink.pipeline.feature.Binarizer) CsvSourceBatchOp(com.alibaba.alink.operator.batch.source.CsvSourceBatchOp) QuantileDiscretizer(com.alibaba.alink.pipeline.feature.QuantileDiscretizer) Test(org.junit.Test)

Example 29 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class PipelineModel method load.

@Deprecated
public static PipelineModel load(FilePath filePath, Long mlEnvId) {
    Tuple2<TableSchema, Row> schemaAndMeta = ModelExporterUtils.loadMetaFromAkFile(filePath);
    Tuple2<StageNode[], Params> stagesAndParams = ModelExporterUtils.deserializePipelineStagesAndParamsFromMeta(schemaAndMeta.f1, schemaAndMeta.f0);
    PipelineModel pipelineModel = new PipelineModel(stagesAndParams.f1);
    pipelineModel.setTransformers(ModelExporterUtils.<TransformerBase<?>>fillPipelineStages(new AkSourceBatchOp().setFilePath(filePath).setMLEnvironmentId(mlEnvId), stagesAndParams.f0, schemaAndMeta.f0).toArray(new TransformerBase<?>[0]));
    return pipelineModel;
}
Also used : AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) TableSchema(org.apache.flink.table.api.TableSchema) MapperParams(com.alibaba.alink.params.mapper.MapperParams) ModelStreamScanParams(com.alibaba.alink.params.ModelStreamScanParams) Params(org.apache.flink.ml.api.misc.param.Params) Row(org.apache.flink.types.Row)

Example 30 with AkSourceBatchOp

use of com.alibaba.alink.operator.batch.source.AkSourceBatchOp in project Alink by alibaba.

the class Chap23 method c_2.

static void c_2() throws Exception {
    if (!new File(DATA_DIR + TRAIN_FILE).exists()) {
        ArrayList<Row> trainRows = new ArrayList<>();
        ArrayList<Row> testRows = new ArrayList<>();
        for (String label : new String[] { "pos", "neg" }) {
            File subfolder = new File(ORIGIN_DATA_DIR + "train" + File.separator + label);
            for (File f : subfolder.listFiles()) {
                trainRows.add(Row.of(label, readFileContent(f)));
            }
        }
        for (String label : new String[] { "pos", "neg" }) {
            File subfolder = new File(ORIGIN_DATA_DIR + "test" + File.separator + label);
            for (File f : subfolder.listFiles()) {
                testRows.add(Row.of(label, readFileContent(f)));
            }
        }
        new MemSourceBatchOp(trainRows, COL_NAMES).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TRAIN_FILE));
        new MemSourceBatchOp(testRows, COL_NAMES).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TEST_FILE));
        BatchOperator.execute();
    }
    AkSourceBatchOp train_set = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    AkSourceBatchOp test_set = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    train_set.lazyPrint(2);
    new Pipeline().add(new RegexTokenizer().setPattern("\\W+").setSelectedCol(TXT_COL_NAME)).add(new DocCountVectorizer().setFeatureType("WORD_COUNT").setSelectedCol(TXT_COL_NAME).setOutputCol(VECTOR_COL_NAME).enableLazyPrintTransformData(1)).add(new LogisticRegression().setMaxIter(30).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME)).fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("DocCountVectorizer"));
    BatchOperator.execute();
    new Pipeline().add(new RegexTokenizer().setPattern("\\W+").setSelectedCol(TXT_COL_NAME)).add(new DocHashCountVectorizer().setFeatureType("WORD_COUNT").setSelectedCol(TXT_COL_NAME).setOutputCol(VECTOR_COL_NAME).enableLazyPrintTransformData(1)).add(new LogisticRegression().setMaxIter(30).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME)).fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("DocHashCountVectorizer"));
    BatchOperator.execute();
}
Also used : ArrayList(java.util.ArrayList) DocCountVectorizer(com.alibaba.alink.pipeline.nlp.DocCountVectorizer) Pipeline(com.alibaba.alink.pipeline.Pipeline) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp) MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) RegexTokenizer(com.alibaba.alink.pipeline.nlp.RegexTokenizer) Row(org.apache.flink.types.Row) AkSinkBatchOp(com.alibaba.alink.operator.batch.sink.AkSinkBatchOp) LogisticRegression(com.alibaba.alink.pipeline.classification.LogisticRegression) File(java.io.File) DocHashCountVectorizer(com.alibaba.alink.pipeline.nlp.DocHashCountVectorizer)

Aggregations

AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)66 AkSinkBatchOp (com.alibaba.alink.operator.batch.sink.AkSinkBatchOp)20 EvalBinaryClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)18 File (java.io.File)16 EvalMultiClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp)10 Pipeline (com.alibaba.alink.pipeline.Pipeline)10 LogisticRegression (com.alibaba.alink.pipeline.classification.LogisticRegression)10 EvalClusterBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalClusterBatchOp)9 Stopwatch (com.alibaba.alink.common.utils.Stopwatch)8 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)7 Row (org.apache.flink.types.Row)6 Test (org.junit.Test)6 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)5 CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)5 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)5 ArrayList (java.util.ArrayList)4 PluginDownloader (com.alibaba.alink.common.io.plugin.PluginDownloader)3 RegisterKey (com.alibaba.alink.common.io.plugin.RegisterKey)3 LogisticRegressionPredictBatchOp (com.alibaba.alink.operator.batch.classification.LogisticRegressionPredictBatchOp)3 LogisticRegressionTrainBatchOp (com.alibaba.alink.operator.batch.classification.LogisticRegressionTrainBatchOp)3