use of com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp in project Alink by alibaba.
the class GbdtBatchOpTest method linkFromSimple.
@Test
public void linkFromSimple() throws Exception {
Row[] testArray = new Row[] { Row.of(1, 2, 0), Row.of(1, 2, 0), Row.of(0, 3, 1), Row.of(0, 2, 0), Row.of(1, 3, 1), Row.of(4, 3, 1), Row.of(4, 4, 1) };
String[] colNames = new String[] { "col0", "col1", "label" };
MemSourceBatchOp memSourceBatchOp = new MemSourceBatchOp(Arrays.asList(testArray), colNames);
GbdtTrainBatchOp gbdtTrainBatchOp = new GbdtTrainBatchOp().setFeatureCols(colNames[0], colNames[1]).setLabelCol(colNames[2]).setMinSamplesPerLeaf(1).setNumTrees(2);
BatchOperator<?> model = gbdtTrainBatchOp.linkFrom(memSourceBatchOp);
Assert.assertEquals(new GbdtPredictBatchOp().setPredictionCol("pred_col").setPredictionDetailCol("pred_detail").linkFrom(model, memSourceBatchOp).link(new EvalBinaryClassBatchOp().setLabelCol(colNames[2]).setPositiveLabelValueString("1").setPredictionDetailCol("pred_detail")).collectMetrics().getAuc(), 1.0, 1e-6);
}
use of com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp in project Alink by alibaba.
the class FmRecommImplicitTest method eval.
private static void eval(BatchOperator<?> pred) {
pred = pred.select("label, concat('{\"0\":', cast((1-p) as varchar), ',\"1\":', cast(p as varchar), '}') as p_detail");
EvalBinaryClassBatchOp eval = new EvalBinaryClassBatchOp().setLabelCol("label").setPredictionDetailCol("p_detail").linkFrom(pred);
BinaryClassMetrics metrics = eval.collectMetrics();
System.out.println(String.format("auc=%f,acc=%f,f1=%f", metrics.getAuc(), metrics.getAccuracy(), metrics.getF1()));
}
use of com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp in project Alink by alibaba.
the class Chap23 method c_2.
static void c_2() throws Exception {
if (!new File(DATA_DIR + TRAIN_FILE).exists()) {
ArrayList<Row> trainRows = new ArrayList<>();
ArrayList<Row> testRows = new ArrayList<>();
for (String label : new String[] { "pos", "neg" }) {
File subfolder = new File(ORIGIN_DATA_DIR + "train" + File.separator + label);
for (File f : subfolder.listFiles()) {
trainRows.add(Row.of(label, readFileContent(f)));
}
}
for (String label : new String[] { "pos", "neg" }) {
File subfolder = new File(ORIGIN_DATA_DIR + "test" + File.separator + label);
for (File f : subfolder.listFiles()) {
testRows.add(Row.of(label, readFileContent(f)));
}
}
new MemSourceBatchOp(trainRows, COL_NAMES).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TRAIN_FILE));
new MemSourceBatchOp(testRows, COL_NAMES).link(new AkSinkBatchOp().setFilePath(DATA_DIR + TEST_FILE));
BatchOperator.execute();
}
AkSourceBatchOp train_set = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
AkSourceBatchOp test_set = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
train_set.lazyPrint(2);
new Pipeline().add(new RegexTokenizer().setPattern("\\W+").setSelectedCol(TXT_COL_NAME)).add(new DocCountVectorizer().setFeatureType("WORD_COUNT").setSelectedCol(TXT_COL_NAME).setOutputCol(VECTOR_COL_NAME).enableLazyPrintTransformData(1)).add(new LogisticRegression().setMaxIter(30).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME)).fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("DocCountVectorizer"));
BatchOperator.execute();
new Pipeline().add(new RegexTokenizer().setPattern("\\W+").setSelectedCol(TXT_COL_NAME)).add(new DocHashCountVectorizer().setFeatureType("WORD_COUNT").setSelectedCol(TXT_COL_NAME).setOutputCol(VECTOR_COL_NAME).enableLazyPrintTransformData(1)).add(new LogisticRegression().setMaxIter(30).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME)).fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("DocHashCountVectorizer"));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp in project Alink by alibaba.
the class Chap23 method c_3.
static void c_3() throws Exception {
AkSourceBatchOp train_set = new AkSourceBatchOp().setFilePath(DATA_DIR + TRAIN_FILE);
AkSourceBatchOp test_set = new AkSourceBatchOp().setFilePath(DATA_DIR + TEST_FILE);
new Pipeline().add(new RegexTokenizer().setPattern("\\W+").setSelectedCol(TXT_COL_NAME)).add(new DocCountVectorizer().setFeatureType("WORD_COUNT").setSelectedCol(TXT_COL_NAME).setOutputCol(VECTOR_COL_NAME)).add(new NGram().setN(2).setSelectedCol(TXT_COL_NAME).setOutputCol("v_2").enableLazyPrintTransformData(1, "2-gram")).add(new DocCountVectorizer().setFeatureType("WORD_COUNT").setSelectedCol("v_2").setOutputCol("v_2")).add(new VectorAssembler().setSelectedCols(VECTOR_COL_NAME, "v_2").setOutputCol(VECTOR_COL_NAME)).add(new LogisticRegression().setMaxIter(30).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME)).fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("NGram 2"));
BatchOperator.execute();
new Pipeline().add(new RegexTokenizer().setPattern("\\W+").setSelectedCol(TXT_COL_NAME)).add(new DocCountVectorizer().setFeatureType("WORD_COUNT").setSelectedCol(TXT_COL_NAME).setOutputCol(VECTOR_COL_NAME)).add(new NGram().setN(2).setSelectedCol(TXT_COL_NAME).setOutputCol("v_2")).add(new DocCountVectorizer().setFeatureType("WORD_COUNT").setSelectedCol("v_2").setOutputCol("v_2")).add(new NGram().setN(3).setSelectedCol(TXT_COL_NAME).setOutputCol("v_3")).add(new DocCountVectorizer().setFeatureType("WORD_COUNT").setVocabSize(10000).setSelectedCol("v_3").setOutputCol("v_3")).add(new VectorAssembler().setSelectedCols(VECTOR_COL_NAME, "v_2", "v_3").setOutputCol(VECTOR_COL_NAME)).add(new LogisticRegression().setMaxIter(30).setVectorCol(VECTOR_COL_NAME).setLabelCol(LABEL_COL_NAME).setPredictionCol(PREDICTION_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME)).fit(train_set).transform(test_set).link(new EvalBinaryClassBatchOp().setPositiveLabelValueString("pos").setLabelCol(LABEL_COL_NAME).setPredictionDetailCol(PRED_DETAIL_COL_NAME).lazyPrintMetrics("NGram 2 and 3"));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.evaluation.EvalBinaryClassBatchOp in project Alink by alibaba.
the class FmClassifierTest method testFmSparse.
@Test
public void testFmSparse() {
BatchOperator<?> trainData = new MemSourceBatchOp(new Object[][] { { "1:1.1 3:2.0", 1.0 }, { "2:2.1 10:3.1", 1.0 }, { "3:3.1 7:2.2", 1.0 }, { "1:1.2 5:3.2", 0.0 }, { "3:1.2 7:4.2", 0.0 } }, new String[] { "vec", "label" });
FmClassifierTrainBatchOp adagrad = new FmClassifierTrainBatchOp().setVectorCol("vec").setLabelCol("label").setNumEpochs(10).setInitStdev(0.01).setLearnRate(0.01).setEpsilon(0.0001).linkFrom(trainData);
BatchOperator<?> result = new FmPredictBatchOp().setVectorCol("vec").setPredictionCol("pred").setPredictionDetailCol("details").linkFrom(adagrad, trainData);
List<Row> eval = new EvalBinaryClassBatchOp().setLabelCol("label").setPredictionDetailCol("details").linkFrom(result).link(new JsonValueBatchOp().setSelectedCol("Data").setReservedCols(new String[] { "Statistics" }).setOutputCols(new String[] { "Accuracy", "AUC", "ConfusionMatrix" }).setJsonPath("$.Accuracy", "$.AUC", "$.ConfusionMatrix")).collect();
Assert.assertEquals(Double.parseDouble(eval.get(0).getField(0).toString()), 0.8, 0.01);
}
Aggregations