use of org.apache.spark.ml.feature.OneHotEncoderModel in project jpmml-sparkml by jpmml.
the class OneHotEncoderModelConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
OneHotEncoderModel transformer = getTransformer();
String[] inputCols = transformer.getInputCols();
boolean dropLast = transformer.getDropLast();
List<Feature> result = new ArrayList<>();
for (int i = 0; i < inputCols.length; i++) {
CategoricalFeature categoricalFeature = (CategoricalFeature) encoder.getOnlyFeature(inputCols[i]);
List<String> values = categoricalFeature.getValues();
if (dropLast) {
values = values.subList(0, values.size() - 1);
}
List<BinaryFeature> binaryFeatures = new ArrayList<>();
for (String value : values) {
binaryFeatures.add(new BinaryFeature(encoder, categoricalFeature.getName(), DataType.STRING, value));
}
result.add(new BinarizedCategoricalFeature(encoder, categoricalFeature.getName(), categoricalFeature.getDataType(), binaryFeatures));
}
return result;
}
Aggregations