Search in sources :

Example 51 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class Chap23 method c_4.

static void c_4() throws Exception {
    AkSourceBatchOp train_set = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
    if (!new File(DATA_DIR + PIPELINE_MODEL).exists()) {
        new Pipeline().add(new RegexTokenizer().setPattern("\\W+").setSelectedCol(TXT_COL_NAME)).add(new DocCountVectorizer().setFeatureType("WORD_COUNT").setSelectedCol(TXT_COL_NAME).setOutputCol(VECTOR_COL_NAME)).add(new NGram().setN(2).setSelectedCol(TXT_COL_NAME).setOutputCol("v_2")).add(new DocCountVectorizer().setFeatureType("WORD_COUNT").setVocabSize(50000).setSelectedCol("v_2").setOutputCol("v_2")).add(new NGram().setN(3).setSelectedCol(TXT_COL_NAME).setOutputCol("v_3")).add(new DocCountVectorizer().setFeatureType("WORD_COUNT").setVocabSize(10000).setSelectedCol("v_3").setOutputCol("v_3")).add(new VectorAssembler().setSelectedCols(VECTOR_COL_NAME, "v_2", "v_3").setOutputCol(VECTOR_COL_NAME)).add(new LogisticRegression().setMaxIter(30).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME)).fit(train_set).save(DATA_DIR + PIPELINE_MODEL);
        BatchOperator.execute();
    }
    PipelineModel pipeline_model = PipelineModel.load(DATA_DIR + PIPELINE_MODEL);
    AkSourceBatchOp test_set = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
    pipeline_model.transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("NGram 2 and 3"));
    BatchOperator.execute();
    AkSourceStreamOp test_stream = new AkSourceStreamOp().setFilePath(DATA_DIR + TEST_FILE);
    pipeline_model.transform(test_stream).sample(0.001).select(PREDICTION_COL_NAME + ", " + LABEL_COL_NAME + ", " + TXT_COL_NAME).print();
    StreamOperator.execute();
    String str = "Oh dear. good cast, but to write and direct is an art and to write wit and direct wit is a bit of a " + "task. Even doing good comedy you have to get the timing and moment right. Im not putting it all down " + "there were parts where i laughed loud but that was at very few times. The main focus to me was on the " + "fast free flowing dialogue, that made some people in the film annoying. It may sound great while " + "reading the script in your head but getting that out and to the camera is a different task. And the " + "hand held camera work does give energy to few parts of the film. Overall direction was good but the " + "script was not all that to me, but I'm sure you was reading the script in your head it would sound good" + ". Sorry.";
    Row pred_row;
    LocalPredictor local_predictor = pipeline_model.collectLocalPredictor("review string");
    System.out.println(local_predictor.getOutputSchema());
    pred_row = local_predictor.map(Row.of(str));
    System.out.println(pred_row.getField(4));
    LocalPredictor local_predictor_2 = new LocalPredictor(DATA_DIR + PIPELINE_MODEL, "review string");
    System.out.println(local_predictor_2.getOutputSchema());
    pred_row = local_predictor_2.map(Row.of(str));
    System.out.println(pred_row.getField(4));
}
Also used : LocalPredictor(com.alibaba.alink.pipeline.LocalPredictor) VectorAssembler(com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler) NGram(com.alibaba.alink.pipeline.nlp.NGram) DocCountVectorizer(com.alibaba.alink.pipeline.nlp.DocCountVectorizer) Pipeline(com.alibaba.alink.pipeline.Pipeline) PipelineModel(com.alibaba.alink.pipeline.PipelineModel) EvalBinaryClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp) AkSourceBatchOp(com.alibaba.alink.operator.batch.source.AkSourceBatchOp) RegexTokenizer(com.alibaba.alink.pipeline.nlp.RegexTokenizer) AkSourceStreamOp(com.alibaba.alink.operator.stream.source.AkSourceStreamOp) Row(org.apache.flink.types.Row) LogisticRegression(com.alibaba.alink.pipeline.classification.LogisticRegression) File(java.io.File)

Example 52 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class Chap25 method dnnReg.

public static void dnnReg(BatchOperator<?> train_set, BatchOperator<?> test_set) throws Exception {
    BatchOperator.setParallelism(1);
    new Pipeline().add(new StandardScaler().setSelectedCols(Chap16.FEATURE_COL_NAMES)).add(new VectorAssembler().setSelectedCols(Chap16.FEATURE_COL_NAMES).setOutputCol("vec")).add(new VectorToTensor().setSelectedCol("vec").setOutputCol("tensor").setReservedCols("quality")).add(new KerasSequentialRegressor().setTensorCol("tensor").setLabelCol("quality").setPredictionCol("pred").setLayers("Dense(64, activation='relu')", "Dense(64, activation='relu')", "Dense(64, activation='relu')", "Dense(64, activation='relu')", "Dense(64, activation='relu')").setNumEpochs(20)).fit(train_set).transform(test_set).lazyPrintStatistics().link(new EvalRegressionBatchOp().setLabelCol("quality").setPredictionCol("pred").lazyPrintMetrics());
    BatchOperator.execute();
}
Also used : StandardScaler(com.alibaba.alink.pipeline.dataproc.StandardScaler) VectorAssembler(com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler) EvalRegressionBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalRegressionBatchOp) KerasSequentialRegressor(com.alibaba.alink.pipeline.regression.KerasSequentialRegressor) VectorToTensor(com.alibaba.alink.pipeline.dataproc.VectorToTensor) Pipeline(com.alibaba.alink.pipeline.Pipeline)

Example 53 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class Chap25 method cnn.

public static void cnn(BatchOperator<?> train_set, BatchOperator<?> test_set) throws Exception {
    BatchOperator.setParallelism(1);
    new Pipeline().add(new VectorFunction().setSelectedCol("vec").setFuncName("Scale").setWithVariable(1.0 / 255.0)).add(new VectorToTensor().setTensorDataType("float").setTensorShape(28, 28).setSelectedCol("vec").setOutputCol("tensor").setReservedCols("label")).add(new KerasSequentialClassifier().setTensorCol("tensor").setLabelCol("label").setPredictionCol("pred").setLayers("Reshape((28, 28, 1))", "Conv2D(32, kernel_size=(3, 3), activation='relu')", "MaxPooling2D(pool_size=(2, 2))", "Conv2D(64, kernel_size=(3, 3), activation='relu')", "MaxPooling2D(pool_size=(2, 2))", "Flatten()", "Dropout(0.5)").setNumEpochs(20).setValidationSplit(0.1).setSaveBestOnly(true).setBestMetric("sparse_categorical_accuracy")).fit(train_set).transform(test_set).link(new EvalMultiClassBatchOp().setLabelCol("label").setPredictionCol("pred").lazyPrintMetrics());
    BatchOperator.execute();
}
Also used : KerasSequentialClassifier(com.alibaba.alink.pipeline.classification.KerasSequentialClassifier) EvalMultiClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp) VectorFunction(com.alibaba.alink.pipeline.dataproc.vector.VectorFunction) VectorToTensor(com.alibaba.alink.pipeline.dataproc.VectorToTensor) Pipeline(com.alibaba.alink.pipeline.Pipeline)

Example 54 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class Chap25 method dnn.

public static void dnn(BatchOperator<?> train_set, BatchOperator<?> test_set) throws Exception {
    BatchOperator.setParallelism(1);
    new Pipeline().add(new VectorFunction().setSelectedCol("vec").setFuncName("Scale").setWithVariable(1.0 / 255.0)).add(new VectorToTensor().setTensorDataType("float").setSelectedCol("vec").setOutputCol("tensor").setReservedCols("label")).add(new KerasSequentialClassifier().setTensorCol("tensor").setLabelCol("label").setPredictionCol("pred").setLayers("Dense(256, activation='relu')", "Dense(128, activation='relu')").setNumEpochs(50).setBatchSize(512).setValidationSplit(0.1).setSaveBestOnly(true).setBestMetric("sparse_categorical_accuracy")).fit(train_set).transform(test_set).link(new EvalMultiClassBatchOp().setLabelCol("label").setPredictionCol("pred").lazyPrintMetrics());
    BatchOperator.execute();
}
Also used : KerasSequentialClassifier(com.alibaba.alink.pipeline.classification.KerasSequentialClassifier) EvalMultiClassBatchOp(com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp) VectorFunction(com.alibaba.alink.pipeline.dataproc.vector.VectorFunction) VectorToTensor(com.alibaba.alink.pipeline.dataproc.VectorToTensor) Pipeline(com.alibaba.alink.pipeline.Pipeline)

Example 55 with Pipeline

use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.

the class Chap01 method c_5_4.

static void c_5_4() throws Exception {
    MemSourceBatchOp train_set = new MemSourceBatchOp(new Row[] { Row.of(2009, 0.5), Row.of(2010, 9.36), Row.of(2011, 52.0), Row.of(2012, 191.0), Row.of(2013, 350.0), Row.of(2014, 571.0), Row.of(2015, 912.0), Row.of(2016, 1207.0), Row.of(2017, 1682.0) }, new String[] { "x", "gmv" });
    Pipeline pipeline = new Pipeline().add(new Select().setClause("*, x*x AS x2")).add(new LinearRegression().setFeatureCols("x", "x2").setLabelCol("gmv").setPredictionCol("pred"));
    File file = new File(DATA_DIR + "gmv_pipeline.model");
    if (file.exists()) {
        file.delete();
    }
    pipeline.fit(train_set).save(DATA_DIR + "gmv_pipeline.model");
    BatchOperator.execute();
    PipelineModel pipelineModel = PipelineModel.load(DATA_DIR + "gmv_pipeline.model");
    BatchOperator<?> pred_batch = new MemSourceBatchOp(new Integer[] { 2018, 2019 }, "x");
    pipelineModel.transform(pred_batch).print();
    MemSourceStreamOp pred_stream = new MemSourceStreamOp(new Integer[] { 2018, 2019 }, "x");
    pipelineModel.transform(pred_stream).print();
    StreamOperator.execute();
}
Also used : MemSourceBatchOp(com.alibaba.alink.operator.batch.source.MemSourceBatchOp) MemSourceStreamOp(com.alibaba.alink.operator.stream.source.MemSourceStreamOp) Select(com.alibaba.alink.pipeline.sql.Select) LinearRegression(com.alibaba.alink.pipeline.regression.LinearRegression) File(java.io.File) Pipeline(com.alibaba.alink.pipeline.Pipeline) PipelineModel(com.alibaba.alink.pipeline.PipelineModel)

Aggregations

Pipeline (com.alibaba.alink.pipeline.Pipeline)63 Test (org.junit.Test)38 PipelineModel (com.alibaba.alink.pipeline.PipelineModel)34 LogisticRegression (com.alibaba.alink.pipeline.classification.LogisticRegression)20 Row (org.apache.flink.types.Row)18 BatchOperator (com.alibaba.alink.operator.batch.BatchOperator)16 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)16 VectorAssembler (com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler)11 AkSourceBatchOp (com.alibaba.alink.operator.batch.source.AkSourceBatchOp)10 CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)9 EvalBinaryClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp)8 MemSourceStreamOp (com.alibaba.alink.operator.stream.source.MemSourceStreamOp)7 File (java.io.File)5 ArrayList (java.util.ArrayList)5 EvalMultiClassBatchOp (com.alibaba.alink.operator.batch.evaluation.EvalMultiClassBatchOp)4 StandardScaler (com.alibaba.alink.pipeline.dataproc.StandardScaler)4 Stopwatch (com.alibaba.alink.common.utils.Stopwatch)3 CsvSourceBatchOp (com.alibaba.alink.operator.batch.source.CsvSourceBatchOp)3 KMeans (com.alibaba.alink.pipeline.clustering.KMeans)3 VectorToTensor (com.alibaba.alink.pipeline.dataproc.VectorToTensor)3