use of com.alibaba.alink.operator.batch.dataproc.vector.VectorAssemblerBatchOp in project Alink by alibaba.
the class Chap07 method c_4_2.
static void c_4_2() throws Exception {
BatchOperator<?> source = new CsvSourceBatchOp().setFilePath(DATA_DIR + ORIGIN_FILE).setSchemaStr(SCHEMA_STRING).link(new VectorAssemblerBatchOp().setSelectedCols(FEATURE_COL_NAMES).setOutputCol(VECTOR_COL_NAME).setReservedCols(LABEL_COL_NAME));
source.link(new VectorNormalizeBatchOp().setSelectedCol(VECTOR_COL_NAME).setP(1.0)).firstN(5).print();
}
use of com.alibaba.alink.operator.batch.dataproc.vector.VectorAssemblerBatchOp in project Alink by alibaba.
the class Chap07 method c_4_1.
static void c_4_1() throws Exception {
BatchOperator<?> source = new CsvSourceBatchOp().setFilePath(DATA_DIR + ORIGIN_FILE).setSchemaStr(SCHEMA_STRING).link(new VectorAssemblerBatchOp().setSelectedCols(FEATURE_COL_NAMES).setOutputCol(VECTOR_COL_NAME).setReservedCols(LABEL_COL_NAME));
source.link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary("< Origin data >"));
new VectorStandardScaler().setSelectedCol(VECTOR_COL_NAME).fit(source).transform(source).link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary("< after Vector Standard Scale >"));
new VectorMinMaxScaler().setSelectedCol(VECTOR_COL_NAME).fit(source).transform(source).link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary("< after Vector MinMax Scale >"));
new VectorMaxAbsScaler().setSelectedCol(VECTOR_COL_NAME).fit(source).transform(source).link(new VectorSummarizerBatchOp().setSelectedCol(VECTOR_COL_NAME).lazyPrintVectorSummary("< after Vector MaxAbs Scale >"));
BatchOperator.execute();
}
use of com.alibaba.alink.operator.batch.dataproc.vector.VectorAssemblerBatchOp in project Alink by alibaba.
the class GbdtBatchOpTest method linkFromVectorEps.
@Test
public void linkFromVectorEps() throws Exception {
Row[] testArray = new Row[] { Row.of(1, 2, 0), Row.of(1, 2, 0), Row.of(0, 3, 1), Row.of(0, 2, 0), Row.of(1, 3, 1), Row.of(4, 3, 1), Row.of(4, 4, 1), Row.of(5, 3, 0), Row.of(5, 4, 0), Row.of(5, 2, 1) };
String[] colNames = new String[] { "col0", "col1", "label" };
MemSourceBatchOp memSourceBatchOp = new MemSourceBatchOp(Arrays.asList(testArray), colNames);
VectorAssemblerBatchOp vectorAssemblerBatchOp = new VectorAssemblerBatchOp().setSelectedCols(colNames[0], colNames[1]).setReservedCols(colNames[2]).setOutputCol("vector");
GbdtTrainBatchOp gbdtTrainBatchOp = new GbdtTrainBatchOp(new Params().set(BaseGbdtTrainBatchOp.USE_EPSILON_APPRO_QUANTILE, true)).setVectorCol("vector").setLabelCol(colNames[2]).setMaxLeaves(3).setMinSamplesPerLeaf(1).setLearningRate(1.0).setNumTrees(1).setCriteria(GbdtTrainParams.CriteriaType.XGBOOST);
BatchOperator<?> model = gbdtTrainBatchOp.linkFrom(vectorAssemblerBatchOp.linkFrom(memSourceBatchOp));
GbdtPredictBatchOp predictBatchOp = new GbdtPredictBatchOp().setVectorCol("vector").setPredictionCol("pred").setPredictionDetailCol("detail");
Assert.assertEquals(testArray.length, predictBatchOp.linkFrom(model, vectorAssemblerBatchOp.linkFrom(memSourceBatchOp)).collect().size());
}
use of com.alibaba.alink.operator.batch.dataproc.vector.VectorAssemblerBatchOp in project Alink by alibaba.
the class MultiHotTest method testMultiHot.
@Test
public void testMultiHot() {
BatchOperator<?> vecdata = new MemSourceBatchOp(Arrays.asList(array), veccolNames);
BatchOperator<?> predData = new MemSourceBatchOp(Arrays.asList(predArray), veccolNames);
MultiHotTrainBatchOp mh = new MultiHotTrainBatchOp().setSelectedCols(new String[] { "svec", "vec" }).setDiscreteThresholdsArray(3, 6).setDelimiter(" ").linkFrom(vecdata);
mh.lazyPrintModelInfo();
Row result = new MultiHotPredictBatchOp().setSelectedCols(new String[] { "svec", "vec" }).setHandleInvalid(HandleInvalid.KEEP).setEncode(Encode.VECTOR).setOutputCols("kv1", "kv2").linkFrom(mh, predData).link(new VectorAssemblerBatchOp().setSelectedCols("kv1", "kv2").setOutputCol("kv").setReservedCols()).collect().get(0);
Assert.assertEquals(result.getField(0), VectorUtil.getVector("$14$1:1.0 2:1.0 4:1.0 5:1.0 11:1.0"));
}
use of com.alibaba.alink.operator.batch.dataproc.vector.VectorAssemblerBatchOp in project Alink by alibaba.
the class KnnTrainBatchOp method linkFrom.
@Override
public KnnTrainBatchOp linkFrom(BatchOperator<?>... inputs) {
Preconditions.checkArgument(getFeatureCols() == null ^ getVectorCol() == null, "Must either set featureCols or vectorCol!");
BatchOperator in = checkAndGetFirst(inputs);
if (null != getFeatureCols()) {
in = new VectorAssemblerBatchOp().setSelectedCols(getFeatureCols()).setOutputCol(VECTOR_COL).setReservedCols(getLabelCol()).linkFrom(in);
this.setVectorCol(VECTOR_COL);
}
VectorNearestNeighborTrainBatchOp train = new VectorNearestNeighborTrainBatchOp(getParams()).setMetric(getDistanceType().name()).setSelectedCol(getVectorCol()).setIdCol(getLabelCol()).linkFrom(in);
this.setOutput(train.getDataSet(), train.getSchema());
return this;
}
Aggregations