use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.
the class Chap07 method c_5.
static void c_5() throws Exception {
Row[] rows = new Row[] { Row.of("a", 10.0, 100), Row.of("b", -2.5, 9), Row.of("c", 100.2, 1), Row.of("d", -99.9, 100), Row.of(null, null, null) };
MemSourceBatchOp source = new MemSourceBatchOp(rows, new String[] { "col1", "col2", "col3" });
source.lazyPrint(-1, "< origin data >");
Pipeline pipeline = new Pipeline().add(new Imputer().setSelectedCols("col1").setStrategy(Strategy.VALUE).setFillValue("e")).add(new Imputer().setSelectedCols("col2", "col3").setStrategy(Strategy.MEAN));
pipeline.fit(source).transform(source).print();
System.out.println(210 / 4);
}
use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.
the class Chap14 method c_3.
static void c_3() throws Exception {
CsvSourceBatchOp trainBatchData = new CsvSourceBatchOp().setFilePath("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/avazu-small.csv").setSchemaStr(SCHEMA_STRING);
// setup feature enginerring pipeline
Pipeline feature_pipeline = new Pipeline().add(new StandardScaler().setSelectedCols(NUMERICAL_COL_NAMES)).add(new FeatureHasher().setSelectedCols(ArrayUtils.addAll(CATEGORY_COL_NAMES, NUMERICAL_COL_NAMES)).setCategoricalCols(CATEGORY_COL_NAMES).setOutputCol(VEC_COL_NAME).setNumFeatures(NUM_HASH_FEATURES));
if (!new File(DATA_DIR + FEATURE_MODEL_FILE).exists()) {
// fit and save feature pipeline model
feature_pipeline.fit(trainBatchData).save(DATA_DIR + FEATURE_MODEL_FILE);
BatchOperator.execute();
}
}
use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.
the class PipelineCandidatesGrid method get.
@Override
public Tuple2<Pipeline, List<Tuple3<Integer, ParamInfo, Object>>> get(int index, List<Double> experienceScores) throws CloneNotSupportedException {
ArrayList<Tuple3<Integer, ParamInfo, Object>> paramList = new ArrayList<>();
for (int i = this.dim - 1; i >= 0; i--) {
int k = index / this.counts[i];
index = index % this.counts[i];
Tuple3<Integer, ParamInfo, Object[]> t3 = this.items.get(i);
paramList.add(new Tuple3<>(t3.f0, t3.f1, t3.f2[k]));
}
Pipeline pipelineClone = this.pipeline.clone();
updatePipelineParams(pipelineClone, paramList);
return Tuple2.of(pipelineClone, paramList);
}
use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.
the class LogisticRegTest method pipelineTestBatch.
@Test
public void pipelineTestBatch() {
String[] xVars = new String[] { "f0", "f1", "f2", "f3" };
String yVar = "labels";
String vectorName = "vec";
String svectorName = "svec";
LogisticRegression lr = new LogisticRegression().setLabelCol(yVar).setFeatureCols(xVars).setPredictionCol("lrpred").enableLazyPrintModelInfo().enableLazyPrintTrainInfo();
LogisticRegression vectorLr = new LogisticRegression().setLabelCol(yVar).setVectorCol(vectorName).setPredictionCol("vlrpred").enableLazyPrintModelInfo().enableLazyPrintTrainInfo();
LogisticRegression sparseVectorLr = new LogisticRegression().setLabelCol(yVar).setVectorCol(svectorName).setPredictionCol("svlrpred").enableLazyPrintModelInfo().enableLazyPrintTrainInfo();
Pipeline plLr = new Pipeline().add(lr).add(vectorLr).add(sparseVectorLr);
BatchOperator<?> trainData = getData();
PipelineModel model = plLr.fit(trainData);
BatchOperator<?> result = model.transform(trainData).select(new String[] { "labels", "lrpred", "vlrpred", "svlrpred" });
List<Row> data = result.collect();
for (Row row : data) {
for (int i = 1; i < 3; ++i) {
Assert.assertEquals(row.getField(0), row.getField(i));
}
}
}
use of com.alibaba.alink.pipeline.Pipeline in project Alink by alibaba.
the class SoftmaxTest method pipelineTest.
@Test
public void pipelineTest() throws Exception {
BatchOperator<?> vecdata = new MemSourceBatchOp(Arrays.asList(vecrows), veccolNames);
StreamOperator<?> svecdata = new MemSourceStreamOp(Arrays.asList(vecrows), veccolNames);
Pipeline pl = new Pipeline().add(softmax).add(vsoftmax).add(svsoftmax).add(vssoftmax);
PipelineModel model = pl.fit(vecdata);
BatchOperator<?> result = model.transform(vecdata).select(new String[] { "label", "predLr", "vpredLr", "svpredLr" });
List<Row> data = result.lazyPrint(100).collect();
for (Row row : data) {
for (int i = 1; i < 3; ++i) {
Assert.assertEquals(row.getField(0), row.getField(i));
}
}
// below is stream test code
// below is stream test code.
CollectSinkStreamOp sop = model.transform(svecdata).select(new String[] { "label", "predLr", "vpredLr", "svpredLr" }).link(new CollectSinkStreamOp());
StreamOperator.execute();
List<Row> rows = sop.getAndRemoveValues();
for (Row row : rows) {
for (int i = 1; i < 3; ++i) {
Assert.assertEquals(row.getField(0), row.getField(i));
}
}
}
Aggregations