use of com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler in project Alink by alibaba.
the class PipelineSaveAndLoadTest method testLocalPredictor.
@Test
public void testLocalPredictor() throws Exception {
VectorAssembler va = new VectorAssembler().setSelectedCols(Iris.getFeatureColNames()).setOutputCol("features");
MultilayerPerceptronClassifier classifier = new MultilayerPerceptronClassifier().setVectorCol("features").setLabelCol(Iris.getLabelColName()).setLayers(new int[] { 4, 5, 3 }).setMaxIter(30).setPredictionCol("pred_label").setPredictionDetailCol("pred_detail").setReservedCols(Iris.getLabelColName());
Pipeline pipeline = new Pipeline().add(va).add(classifier);
PipelineModel model = pipeline.fit(data);
FilePath filePath = new FilePath(folder.newFile().getAbsolutePath());
model.save(filePath, true);
BatchOperator.execute();
LocalPredictor localPredictor = new LocalPredictor(filePath, new TableSchema(ArrayUtils.add(data.getColNames(), "features"), ArrayUtils.add(data.getColTypes(), VectorTypes.DENSE_VECTOR)));
Row result = localPredictor.map(Row.of(5.1, 3.5, 1.4, 0.2, "Iris-setosanew", new DenseVector(new double[] { 5.1, 3.5, 1.4, 0.2 })));
System.out.println(JsonConverter.toJson(result));
}
use of com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler in project Alink by alibaba.
the class LogisticRegressionMixVecTest method batchMixVecTest12.
@Test
public void batchMixVecTest12() {
BatchOperator<?> trainData = (BatchOperator<?>) getData();
Pipeline pipeline = new Pipeline().add(new VectorAssembler().setSelectedCols(new String[] { "svec", "vec", "f0", "f1", "f2", "f3" }).setOutputCol("allvec")).add(new LogisticRegression().setVectorCol("allvec").setWithIntercept(true).setReservedCols(new String[] { "labels", "allvec" }).setLabelCol("labels").setPredictionCol("pred"));
PipelineModel model = pipeline.fit(trainData);
model.transform(trainData).collect();
}
use of com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler in project Alink by alibaba.
the class LogisticRegressionMixVecTest method batchMixVecTest13.
@Test
public void batchMixVecTest13() {
BatchOperator<?> trainData = (BatchOperator<?>) getData();
Pipeline pipeline = new Pipeline().add(new VectorAssembler().setSelectedCols(new String[] { "svec", "vec", "f0", "f1", "f2", "f3" }).setOutputCol("allvec")).add(new LogisticRegression().setVectorCol("allvec").setWithIntercept(false).setStandardization(false).setLabelCol("labels").setReservedCols(new String[] { "labels" }).setPredictionCol("pred"));
PipelineModel model = pipeline.fit(trainData);
model.transform(trainData).collect();
}
use of com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler in project Alink by alibaba.
the class LogisticRegressionMixVecTest method batchMixVecTest11.
@Test
public void batchMixVecTest11() {
BatchOperator<?> trainData = (BatchOperator<?>) getData();
Pipeline pipeline = new Pipeline().add(new VectorAssembler().setSelectedCols(new String[] { "svec", "vec", "f0", "f1", "f2", "f3" }).setOutputCol("allvec")).add(new LogisticRegression().setVectorCol("allvec").setWithIntercept(true).setReservedCols(new String[] { "labels" }).setStandardization(false).setLabelCol("labels").setPredictionCol("pred"));
PipelineModel model = pipeline.fit(trainData);
model.transform(trainData).collect();
}
use of com.alibaba.alink.pipeline.dataproc.vector.VectorAssembler in project Alink by alibaba.
the class FmRecommTrainBatchOp method createFeatureVectors.
private static BatchOperator<?> createFeatureVectors(BatchOperator<?> featureTable, String idCol, String[] featureCols, String[] categoricalCols) {
TableUtil.assertSelectedColExist(featureCols, categoricalCols);
String[] numericalCols = subtract(featureCols, categoricalCols);
final Long envId = featureTable.getMLEnvironmentId();
if (categoricalCols.length > 0) {
OneHotEncoder onehot = new OneHotEncoder().setMLEnvironmentId(envId).setSelectedCols(categoricalCols).setOutputCols("__fm_features__").setDropLast(false);
featureTable = onehot.fit(featureTable).transform(featureTable);
numericalCols = (String[]) ArrayUtils.add(numericalCols, "__fm_features__");
}
VectorAssembler va = new VectorAssembler().setMLEnvironmentId(envId).setSelectedCols(numericalCols).setOutputCol("__fm_features__").setReservedCols(idCol);
featureTable = va.transform(featureTable);
featureTable = featureTable.udf("__fm_features__", "__fm_features__", new ConvertVec());
return featureTable;
}
Aggregations