use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.
the class SparkMLEncoder method putFeatures.
public void putFeatures(String column, List<Feature> features) {
List<Feature> existingFeatures = this.columnFeatures.get(column);
if (existingFeatures != null && existingFeatures.size() > 0) {
if (features.size() != existingFeatures.size()) {
throw new IllegalArgumentException("Expected " + existingFeatures.size() + " features, got " + features.size() + " features");
}
for (int i = 0; i < existingFeatures.size(); i++) {
Feature existingFeature = existingFeatures.get(i);
Feature feature = features.get(i);
if (!(feature.getName()).equals(existingFeature.getName())) {
throw new IllegalArgumentException();
}
}
}
this.columnFeatures.put(column, features);
}
use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.
the class StringIndexerModelConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
StringIndexerModel transformer = getTransformer();
Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
List<String> categories = new ArrayList<>();
categories.addAll(Arrays.asList(transformer.labels()));
String handleInvalid = transformer.getHandleInvalid();
Field<?> field = encoder.toCategorical(feature.getName(), categories);
if (field instanceof DataField) {
DataField dataField = (DataField) field;
InvalidValueTreatmentMethod invalidValueTreatmentMethod;
switch(handleInvalid) {
case "keep":
invalidValueTreatmentMethod = InvalidValueTreatmentMethod.AS_IS;
break;
case "error":
invalidValueTreatmentMethod = InvalidValueTreatmentMethod.RETURN_INVALID;
break;
default:
throw new IllegalArgumentException(handleInvalid);
}
InvalidValueDecorator invalidValueDecorator = new InvalidValueDecorator().setInvalidValueTreatment(invalidValueTreatmentMethod);
encoder.addDecorator(dataField.getName(), invalidValueDecorator);
} else if (field instanceof DerivedField) {
// Ignored
} else {
throw new IllegalArgumentException();
}
switch(handleInvalid) {
case "keep":
Apply setApply = PMMLUtil.createApply("isIn", feature.ref());
for (String category : categories) {
setApply.addExpressions(PMMLUtil.createConstant(category, feature.getDataType()));
}
categories.add(StringIndexerModelConverter.LABEL_UNKNOWN);
Apply apply = PMMLUtil.createApply("if", setApply, feature.ref(), PMMLUtil.createConstant(StringIndexerModelConverter.LABEL_UNKNOWN, DataType.STRING));
field = encoder.createDerivedField(FeatureUtil.createName("handleInvalid", feature), OpType.CATEGORICAL, feature.getDataType(), apply);
break;
default:
break;
}
return Collections.<Feature>singletonList(new CategoricalFeature(encoder, field, categories));
}
use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.
the class TokenizerConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
Tokenizer transformer = getTransformer();
Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
Apply apply = PMMLUtil.createApply("lowercase", feature.ref());
DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply);
return Collections.<Feature>singletonList(new DocumentFeature(encoder, derivedField, "\\s+"));
}
Aggregations