Search in sources :

Example 1 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class Chap07 method c_5.

static void c_5() throws Exception {
    Row[] rows = new Row[] { Row.of("a", 10.0, 100), Row.of("b", -2.5, 9), Row.of("c", 100.2, 1), Row.of("d", -99.9, 100), Row.of(null, null, null) };
    MemSourceBatchOp source = new MemSourceBatchOp(rows, new String[] { "col1", "col2", "col3" });
    source.lazyPrint(-1, "< origin data >");
    Pipeline pipeline = new Pipeline().add(new Imputer().setSelectedCols("col1").setStrategy(Strategy.VALUE).setFillValue("e")).add(new Imputer().setSelectedCols("col2", "col3").setStrategy(Strategy.MEAN));
    pipeline.fit(source).transform(source).print();
    System.out.println(210 / 4);
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) Imputer(com.alibaba.alink.pipeline.dataproc.Imputer) Row(org.apache.flink.types.Row) Pipeline(com.alibaba.alink.pipeline.Pipeline)

Example 2 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class Chap14 method c_3.

static void c_3() throws Exception {
    CsvSourceBatchOp trainBatchData = new CsvSourceBatchOp().setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-small.csv").setSchemaStr(SCHEMA_STRING);
    // setup feature enginerring pipeline
    Pipeline feature_pipeline = new Pipeline().add(new StandardScaler().setSelectedCols(NUMERICAL_COL_NAMES)).add(new FeatureHasher().setSelectedCols(ArrayUtils.addAll(CATEGORY_COL_NAMES, NUMERICAL_COL_NAMES)).setCategoricalCols(CATEGORY_COL_NAMES).setOutputCol(VEC_COL_NAME).setNumFeatures(NUM_HASH_FEATURES));
    if (!new File(DATA_DIR + FEATURE_MODEL_FILE).exists()) {
        // fit and save feature pipeline model
        feature_pipeline.fit(trainBatchData).save(DATA_DIR + FEATURE_MODEL_FILE);
        BatchOperator.execute();
    }
}
Also used : FeatureHasher(com.alibaba.alink.pipeline.feature.FeatureHasher) StandardScaler(com.alibaba.alink.pipeline.dataproc.StandardScaler) File(java.io.File) CsvSourceBatchOp(com.alibaba.alink.operator.batch.source.CsvSourceBatchOp) Pipeline(com.alibaba.alink.pipeline.Pipeline)

Example 3 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class PipelineCandidatesGrid method get.

@Override
public Tuple2<Pipeline, List<Tuple3<Integer, ParamInfo, Object>>> get(int index, List<Double> experienceScores) throws CloneNotSupportedException {
    ArrayList<Tuple3<Integer, ParamInfo, Object>> paramList = new ArrayList<>();
    for (int i = this.dim - 1; i >= 0; i--) {
        int k = index / this.counts[i];
        index = index % this.counts[i];
        Tuple3<Integer, ParamInfo, Object[]> t3 = this.items.get(i);
        paramList.add(new Tuple3<>(t3.f0, t3.f1, t3.f2[k]));
    }
    Pipeline pipelineClone = this.pipeline.clone();
    updatePipelineParams(pipelineClone, paramList);
    return Tuple2.of(pipelineClone, paramList);
}
Also used : Tuple3(org.apache.flink.api.java.tuple.Tuple3) ArrayList(java.util.ArrayList) ParamInfo(org.apache.flink.ml.api.misc.param.ParamInfo) Pipeline(com.alibaba.alink.pipeline.Pipeline)

Example 4 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class LogisticRegTest method pipelineTestBatch.

@Test
public void pipelineTestBatch() {
    String[] xVars = new String[] { "f0", "f1", "f2", "f3" };
    String yVar = "labels";
    String vectorName = "vec";
    String svectorName = "svec";
    LogisticRegression lr = new LogisticRegression().setLabelCol(yVar).setFeatureCols(xVars).setPredictionCol("lrpred").enableLazyPrintModelInfo().enableLazyPrintTrainInfo();
    LogisticRegression vectorLr = new LogisticRegression().setLabelCol(yVar).setVectorCol(vectorName).setPredictionCol("vlrpred").enableLazyPrintModelInfo().enableLazyPrintTrainInfo();
    LogisticRegression sparseVectorLr = new LogisticRegression().setLabelCol(yVar).setVectorCol(svectorName).setPredictionCol("svlrpred").enableLazyPrintModelInfo().enableLazyPrintTrainInfo();
    Pipeline plLr = new Pipeline().add(lr).add(vectorLr).add(sparseVectorLr);
    BatchOperator<?> trainData = getData();
    PipelineModel model = plLr.fit(trainData);
    BatchOperator<?> result = model.transform(trainData).select(new String[] { "labels", "lrpred", "vlrpred", "svlrpred" });
    List<Row> data = result.collect();
    for (Row row : data) {
        for (int i = 1; i < 3; ++i) {
            Assert.assertEquals(row.getField(0), row.getField(i));
        }
    }
}
Also used : Row(org.apache.flink.types.Row) Pipeline(com.alibaba.alink.pipeline.Pipeline) PipelineModel(com.alibaba.alink.pipeline.PipelineModel) Test(org.junit.Test)

Example 5 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class SoftmaxTest method pipelineTest.

@Test
public void pipelineTest() throws Exception {
    BatchOperator<?> vecdata = new MemSourceBatchOp(Arrays.asList(vecrows), veccolNames);
    StreamOperator<?> svecdata = new MemSourceStreamOp(Arrays.asList(vecrows), veccolNames);
    Pipeline pl = new Pipeline().add(softmax).add(vsoftmax).add(svsoftmax).add(vssoftmax);
    PipelineModel model = pl.fit(vecdata);
    BatchOperator<?> result = model.transform(vecdata).select(new String[] { "label", "predLr", "vpredLr", "svpredLr" });
    List<Row> data = result.lazyPrint(100).collect();
    for (Row row : data) {
        for (int i = 1; i < 3; ++i) {
            Assert.assertEquals(row.getField(0), row.getField(i));
        }
    }
    // below is stream test code
    // below is stream test code.
    CollectSinkStreamOp sop = model.transform(svecdata).select(new String[] { "label", "predLr", "vpredLr", "svpredLr" }).link(new CollectSinkStreamOp());
    StreamOperator.execute();
    List<Row> rows = sop.getAndRemoveValues();
    for (Row row : rows) {
        for (int i = 1; i < 3; ++i) {
            Assert.assertEquals(row.getField(0), row.getField(i));
        }
    }
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) CollectSinkStreamOp(com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp) Row(org.apache.flink.types.Row) Pipeline(com.alibaba.alink.pipeline.Pipeline) PipelineModel(com.alibaba.alink.pipeline.PipelineModel) Test(org.junit.Test)

Aggregations

Pipeline (com.alibaba.alink.pipeline.Pipeline)63 Test (org.junit.Test)38 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)34 LogisticRegression (com.alibaba.alink.pipeline.classification.LogisticRegression)20 Row (org.apache.flink.types.Row)18 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)16 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)16 VectorAssembler (com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler)11 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)10 CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)9 EvalBinaryClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)8 MemSourceStreamOp (com.alibaba.alink.operator.stream.source.MemSourceStreamOp)7 File (java.io.File)5 ArrayList (java.util.ArrayList)5 EvalMultiClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp)4 StandardScaler (com.alibaba.alink.pipeline.dataproc.StandardScaler)4 Stopwatch (com.alibaba.alink.common.utils.Stopwatch)3 CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)3 KMeans (com.alibaba.alink.pipeline.clustering.KMeans)3 VectorToTensor (com.alibaba.alink.pipeline.dataproc.VectorToTensor)3