use of org.jpmml.converter.CategoricalFeature in project jpmml-r by jpmml.
the class RExpEncoder method addFeature.
public void addFeature(Field<?> field) {
Feature feature;
OpType opType = field.getOpType();
switch(opType) {
case CATEGORICAL:
feature = new CategoricalFeature(this, (DataField) field);
break;
case CONTINUOUS:
feature = new ContinuousFeature(this, field);
break;
default:
throw new IllegalArgumentException();
}
addFeature(feature);
}
use of org.jpmml.converter.CategoricalFeature in project jpmml-sparkml by jpmml.
the class IndexToStringConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
IndexToString transformer = getTransformer();
DataField dataField = encoder.createDataField(formatName(transformer), OpType.CATEGORICAL, DataType.STRING, Arrays.asList(transformer.getLabels()));
return Collections.<Feature>singletonList(new CategoricalFeature(encoder, dataField));
}
use of org.jpmml.converter.CategoricalFeature in project jpmml-sparkml by jpmml.
the class OneHotEncoderConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
OneHotEncoder transformer = getTransformer();
boolean dropLast = true;
Option<Object> dropLastOption = transformer.get(transformer.dropLast());
if (dropLastOption.isDefined()) {
dropLast = (Boolean) dropLastOption.get();
}
CategoricalFeature categoricalFeature = (CategoricalFeature) encoder.getOnlyFeature(transformer.getInputCol());
List<String> values = categoricalFeature.getValues();
if (dropLast) {
values = values.subList(0, values.size() - 1);
}
List<Feature> result = new ArrayList<>();
for (String value : values) {
result.add(new BinaryFeature(encoder, categoricalFeature.getName(), DataType.STRING, value));
}
return result;
}
use of org.jpmml.converter.CategoricalFeature in project jpmml-sparkml by jpmml.
the class BucketizerConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
Bucketizer transformer = getTransformer();
Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
ContinuousFeature continuousFeature = feature.toContinuousFeature();
Discretize discretize = new Discretize(continuousFeature.getName());
List<String> categories = new ArrayList<>();
double[] splits = transformer.getSplits();
for (int i = 0; i < (splits.length - 1); i++) {
String category = String.valueOf(i);
categories.add(category);
Interval interval = new Interval((i < (splits.length - 2)) ? Interval.Closure.CLOSED_OPEN : Interval.Closure.CLOSED_CLOSED).setLeftMargin(formatMargin(splits[i])).setRightMargin(formatMargin(splits[i + 1]));
DiscretizeBin discretizeBin = new DiscretizeBin(category, interval);
discretize.addDiscretizeBins(discretizeBin);
}
DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.INTEGER, discretize);
return Collections.<Feature>singletonList(new CategoricalFeature(encoder, derivedField, categories));
}
use of org.jpmml.converter.CategoricalFeature in project jpmml-sparkml by jpmml.
the class ClassificationModelConverter method registerOutputFields.
@Override
public List<OutputField> registerOutputFields(Label label, SparkMLEncoder encoder) {
T model = getTransformer();
CategoricalLabel categoricalLabel = (CategoricalLabel) label;
List<OutputField> result = new ArrayList<>();
String predictionCol = model.getPredictionCol();
OutputField pmmlPredictedField = ModelUtil.createPredictedField(FieldName.create("pmml(" + predictionCol + ")"), categoricalLabel.getDataType(), OpType.CATEGORICAL);
result.add(pmmlPredictedField);
List<String> categories = new ArrayList<>();
DocumentBuilder documentBuilder = DOMUtil.createDocumentBuilder();
InlineTable inlineTable = new InlineTable();
List<String> columns = Arrays.asList("input", "output");
for (int i = 0; i < categoricalLabel.size(); i++) {
String value = categoricalLabel.getValue(i);
String category = String.valueOf(i);
categories.add(category);
Row row = DOMUtil.createRow(documentBuilder, columns, Arrays.asList(value, category));
inlineTable.addRows(row);
}
MapValues mapValues = new MapValues().addFieldColumnPairs(new FieldColumnPair(pmmlPredictedField.getName(), columns.get(0))).setOutputColumn(columns.get(1)).setInlineTable(inlineTable);
final OutputField predictedField = new OutputField(FieldName.create(predictionCol), DataType.DOUBLE).setOpType(OpType.CATEGORICAL).setResultFeature(ResultFeature.TRANSFORMED_VALUE).setExpression(mapValues);
result.add(predictedField);
Feature feature = new CategoricalFeature(encoder, predictedField.getName(), predictedField.getDataType(), categories) {
@Override
public ContinuousFeature toContinuousFeature() {
PMMLEncoder encoder = ensureEncoder();
return new ContinuousFeature(encoder, getName(), getDataType());
}
};
encoder.putOnlyFeature(predictionCol, feature);
if (model instanceof HasProbabilityCol) {
HasProbabilityCol hasProbabilityCol = (HasProbabilityCol) model;
String probabilityCol = hasProbabilityCol.getProbabilityCol();
List<Feature> features = new ArrayList<>();
for (int i = 0; i < categoricalLabel.size(); i++) {
String value = categoricalLabel.getValue(i);
OutputField probabilityField = ModelUtil.createProbabilityField(FieldName.create(probabilityCol + "(" + value + ")"), DataType.DOUBLE, value);
result.add(probabilityField);
features.add(new ContinuousFeature(encoder, probabilityField.getName(), probabilityField.getDataType()));
}
encoder.putFeatures(probabilityCol, features);
}
return result;
}
Aggregations