use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.
the class MinMaxScalerModelConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
MinMaxScalerModel transformer = getTransformer();
double rescaleFactor = (transformer.getMax() - transformer.getMin());
double rescaleConstant = transformer.getMin();
List<Feature> features = encoder.getFeatures(transformer.getInputCol());
Vector originalMax = transformer.originalMax();
if (originalMax.size() != features.size()) {
throw new IllegalArgumentException();
}
Vector originalMin = transformer.originalMin();
if (originalMin.size() != features.size()) {
throw new IllegalArgumentException();
}
List<Feature> result = new ArrayList<>();
for (int i = 0; i < features.size(); i++) {
Feature feature = features.get(i);
ContinuousFeature continuousFeature = feature.toContinuousFeature();
double max = originalMax.apply(i);
double min = originalMin.apply(i);
Expression expression = PMMLUtil.createApply("/", PMMLUtil.createApply("-", continuousFeature.ref(), PMMLUtil.createConstant(min)), PMMLUtil.createConstant(max - min));
if (!ValueUtil.isOne(rescaleFactor)) {
expression = PMMLUtil.createApply("*", expression, PMMLUtil.createConstant(rescaleFactor));
}
if (!ValueUtil.isZero(rescaleConstant)) {
expression = PMMLUtil.createApply("+", expression, PMMLUtil.createConstant(rescaleConstant));
}
DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CONTINUOUS, DataType.DOUBLE, expression);
result.add(new ContinuousFeature(encoder, derivedField));
}
return result;
}
use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.
the class PCAModelConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
PCAModel transformer = getTransformer();
List<Feature> features = encoder.getFeatures(transformer.getInputCol());
DenseMatrix pc = transformer.pc();
if (pc.numRows() != features.size()) {
throw new IllegalArgumentException();
}
List<Feature> result = new ArrayList<>();
for (int i = 0; i < transformer.getK(); i++) {
Apply apply = new Apply("sum");
for (int j = 0; j < features.size(); j++) {
Feature feature = features.get(j);
ContinuousFeature continuousFeature = feature.toContinuousFeature();
Expression expression = continuousFeature.ref();
Double coefficient = pc.apply(j, i);
if (!ValueUtil.isOne(coefficient)) {
expression = PMMLUtil.createApply("*", expression, PMMLUtil.createConstant(coefficient));
}
apply.addExpressions(expression);
}
DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CONTINUOUS, DataType.DOUBLE, apply);
result.add(new ContinuousFeature(encoder, derivedField));
}
return result;
}
use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.
the class BucketizerConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
Bucketizer transformer = getTransformer();
Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
ContinuousFeature continuousFeature = feature.toContinuousFeature();
Discretize discretize = new Discretize(continuousFeature.getName());
List<String> categories = new ArrayList<>();
double[] splits = transformer.getSplits();
for (int i = 0; i < (splits.length - 1); i++) {
String category = String.valueOf(i);
categories.add(category);
Interval interval = new Interval((i < (splits.length - 2)) ? Interval.Closure.CLOSED_OPEN : Interval.Closure.CLOSED_CLOSED).setLeftMargin(formatMargin(splits[i])).setRightMargin(formatMargin(splits[i + 1]));
DiscretizeBin discretizeBin = new DiscretizeBin(category, interval);
discretize.addDiscretizeBins(discretizeBin);
}
DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.INTEGER, discretize);
return Collections.<Feature>singletonList(new CategoricalFeature(encoder, derivedField, categories));
}
use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.
the class StringIndexerModelConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
StringIndexerModel transformer = getTransformer();
Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
List<String> categories = new ArrayList<>();
categories.addAll(Arrays.asList(transformer.labels()));
String handleInvalid = transformer.getHandleInvalid();
Field<?> field = encoder.toCategorical(feature.getName(), categories);
if (field instanceof DataField) {
DataField dataField = (DataField) field;
InvalidValueTreatmentMethod invalidValueTreatmentMethod;
switch(handleInvalid) {
case "keep":
invalidValueTreatmentMethod = InvalidValueTreatmentMethod.AS_IS;
break;
case "error":
invalidValueTreatmentMethod = InvalidValueTreatmentMethod.RETURN_INVALID;
break;
default:
throw new IllegalArgumentException(handleInvalid);
}
InvalidValueDecorator invalidValueDecorator = new InvalidValueDecorator().setInvalidValueTreatment(invalidValueTreatmentMethod);
encoder.addDecorator(dataField.getName(), invalidValueDecorator);
} else if (field instanceof DerivedField) {
// Ignored
} else {
throw new IllegalArgumentException();
}
switch(handleInvalid) {
case "keep":
Apply setApply = PMMLUtil.createApply("isIn", feature.ref());
for (String category : categories) {
setApply.addExpressions(PMMLUtil.createConstant(category, feature.getDataType()));
}
categories.add(StringIndexerModelConverter.LABEL_UNKNOWN);
Apply apply = PMMLUtil.createApply("if", setApply, feature.ref(), PMMLUtil.createConstant(StringIndexerModelConverter.LABEL_UNKNOWN, DataType.STRING));
field = encoder.createDerivedField(FeatureUtil.createName("handleInvalid", feature), OpType.CATEGORICAL, feature.getDataType(), apply);
break;
default:
break;
}
return Collections.<Feature>singletonList(new CategoricalFeature(encoder, field, categories));
}
use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.
the class TokenizerConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
Tokenizer transformer = getTransformer();
Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
Apply apply = PMMLUtil.createApply("lowercase", feature.ref());
DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply);
return Collections.<Feature>singletonList(new DocumentFeature(encoder, derivedField, "\\s+"));
}
Aggregations