use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class PipelineSaveAndLoadTest method test2.
@Test
public void test2() throws Exception {
String model_filename = "/tmp/model2.csv";
CsvSourceBatchOp source = new CsvSourceBatchOp().setSchemaStr("sepal_length double, sepal_width double, petal_length double, petal_width double, category string").setFilePath("https://alink-test-data.oss-cn-hangzhou.aliyuncs.com/iris.csv");
QuantileDiscretizerTrainBatchOp train = new QuantileDiscretizerTrainBatchOp().setNumBuckets(2).setSelectedCols("petal_length").linkFrom(source);
train.link(new AkSinkBatchOp().setFilePath(model_filename).setOverwriteSink(true));
BatchOperator.execute();
// # save pipeline model data to file
String pipelineModelFilename = "/tmp/model23424.csv";
QuantileDiscretizer stage1 = new QuantileDiscretizer().setNumBuckets(2).setSelectedCols("sepal_length");
Binarizer stage2 = new Binarizer().setSelectedCol("petal_width").setThreshold(1.);
AkSourceBatchOp modelData = new AkSourceBatchOp().setFilePath(model_filename);
QuantileDiscretizerModel stage3 = new QuantileDiscretizerModel().setSelectedCols("petal_length").setModelData(modelData);
PipelineModel prevPipelineModel = new Pipeline(stage1, stage2, stage3).fit(source);
prevPipelineModel.save(pipelineModelFilename, true);
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class Chap23 method c_2.
static void c_2() throws Exception {
if (!new File(DATA_DIR + TRAIN_FILE).exists()) {
ArrayList<Row> trainRows = new ArrayList<>();
ArrayList<Row> testRows = new ArrayList<>();
for (String label : new String[] { "pos", "neg" }) {
File subfolder = new File(ORIGIN_DATA_DIR + "train" + File.separator + label);
for (File f : subfolder.listFiles()) {
trainRows.add(Row.of(label, readFileContent(f)));
}
}
for (String label : new String[] { "pos", "neg" }) {
File subfolder = new File(ORIGIN_DATA_DIR + "test" + File.separator + label);
for (File f : subfolder.listFiles()) {
testRows.add(Row.of(label, readFileContent(f)));
}
}
new MemSourceBatchOp(trainRows, COL_NAMES).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TRAIN_FILE));
new MemSourceBatchOp(testRows, COL_NAMES).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TEST_FILE));
BatchOperator.execute();
}
AkSourceBatchOp train_set = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
AkSourceBatchOp test_set = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
train_set.lazyPrint(2);
new Pipeline().add(new RegexTokenizer().setPattern("\\W+").setSelectedCol(TXT_COL_NAME)).add(new DocCountVectorizer().setFeatureType("WORD_COUNT").setSelectedCol(TXT_COL_NAME).setOutputCol(VECTOR_COL_NAME).enableLazyPrintTransformData(1)).add(new LogisticRegression().setMaxIter(30).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME)).fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("DocCountVectorizer"));
BatchOperator.execute();
new Pipeline().add(new RegexTokenizer().setPattern("\\W+").setSelectedCol(TXT_COL_NAME)).add(new DocHashCountVectorizer().setFeatureType("WORD_COUNT").setSelectedCol(TXT_COL_NAME).setOutputCol(VECTOR_COL_NAME).enableLazyPrintTransformData(1)).add(new LogisticRegression().setMaxIter(30).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME)).fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("DocHashCountVectorizer"));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class Chap24 method c_7.
static void c_7() throws Exception {
if (!new File(DATA_DIR + USERCF_MODEL_FILE).exists()) {
getSourceRatings().link(new UserCfTrainBatchOp().setUserCol(USER_COL).setItemCol(ITEM_COL).setRateCol(RATING_COL)).link(new AkSinkBatchOp().setFilePath(DATA_DIR + USERCF_MODEL_FILE));
BatchOperator.execute();
}
MemSourceBatchOp test_data = new MemSourceBatchOp(new Long[] { 50L }, ITEM_COL);
new UserCfUsersPerItemRecommender().setItemCol(ITEM_COL).setRecommCol(RECOMM_COL).setModelData(new AkSourceBatchOp().setFilePath(DATA_DIR + USERCF_MODEL_FILE)).transform(test_data).print();
getSourceRatings().filter("user_id IN (276,429,222,864,194,650,896,303,749,301) AND item_id=50").print();
new UserCfUsersPerItemRecommender().setItemCol(ITEM_COL).setRecommCol(RECOMM_COL).setExcludeKnown(true).setModelData(new AkSourceBatchOp().setFilePath(DATA_DIR + USERCF_MODEL_FILE)).transform(test_data).print();
}
use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class LocalFileDataBridgeGenerator method generate.
@Override
public DataBridge generate(BatchOperator<?> batchOperator, Params params) {
File file;
try {
file = File.createTempFile("alink-data-bridge-", ".ak");
Runtime.getRuntime().addShutdownHook(new Thread(() -> file.delete()));
} catch (IOException e) {
throw new RuntimeException("Cannot create temp file.");
}
new AkSinkBatchOp().setFilePath(file.getAbsolutePath()).setOverwriteSink(true).linkFrom(batchOperator);
try {
BatchOperator.execute();
} catch (Exception e) {
throw new RuntimeException(e);
}
return new LocalFileDataBridge(file.getAbsolutePath());
}
use of com.alibaba.alink.operator.batch.sink.AkSinkBatchOp in project Alink by alibaba.
the class LocalPredictorTest method testGeneModelStream.
@Test
public void testGeneModelStream() throws Exception {
BatchOperator data = Iris.getBatchData();
LogisticRegression lr = new LogisticRegression().setFeatureCols(Iris.getFeatureColNames()).setLabelCol(Iris.getLabelColName()).setPredictionCol("pred_label").setPredictionDetailCol("pred_detail").setModelStreamFilePath("/tmp/rankModel").setMaxIter(100);
OneVsRest oneVsRest = new OneVsRest().setClassifier(lr).setNumClass(3).setPredictionCol("pred").setPredictionDetailCol("detail");
VectorAssembler va = new VectorAssembler().setSelectedCols("sepal_length", "sepal_width").setOutputCol("assem");
Pipeline pipeline = new Pipeline().add(oneVsRest).add(va);
PipelineModel model = pipeline.fit(data);
model.save().link(new AkSinkBatchOp().setFilePath("/tmp/rankModel.ak").setOverwriteSink(true));
BatchOperator.execute();
}
Aggregations