use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.
the class MinMaxScalerModelConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
MinMaxScalerModel transformer = getTransformer();
double rescaleFactor = (transformer.getMax() - transformer.getMin());
double rescaleConstant = transformer.getMin();
Vector originalMin = transformer.originalMin();
Vector originalMax = transformer.originalMax();
List<Feature> features = encoder.getFeatures(transformer.getInputCol());
SchemaUtil.checkSize(Math.max(originalMin.size(), originalMax.size()), features);
List<Feature> result = new ArrayList<>();
for (int i = 0, length = features.size(); i < length; i++) {
Feature feature = features.get(i);
ContinuousFeature continuousFeature = feature.toContinuousFeature();
double min = originalMin.apply(i);
double max = originalMax.apply(i);
Expression expression = PMMLUtil.createApply(PMMLFunctions.DIVIDE, PMMLUtil.createApply(PMMLFunctions.SUBTRACT, continuousFeature.ref(), PMMLUtil.createConstant(min)), PMMLUtil.createConstant(max - min));
if (!ValueUtil.isOne(rescaleFactor)) {
expression = PMMLUtil.createApply(PMMLFunctions.MULTIPLY, expression, PMMLUtil.createConstant(rescaleFactor));
}
if (!ValueUtil.isZero(rescaleConstant)) {
expression = PMMLUtil.createApply(PMMLFunctions.ADD, expression, PMMLUtil.createConstant(rescaleConstant));
}
DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i, length), OpType.CONTINUOUS, DataType.DOUBLE, expression);
result.add(new ContinuousFeature(encoder, derivedField));
}
return result;
}
use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.
the class SQLTransformerConverter method encodeLogicalPlan.
public static List<Field<?>> encodeLogicalPlan(SparkMLEncoder encoder, LogicalPlan logicalPlan) {
List<Field<?>> result = new ArrayList<>();
List<LogicalPlan> children = JavaConversions.seqAsJavaList(logicalPlan.children());
for (LogicalPlan child : children) {
encodeLogicalPlan(encoder, child);
}
List<Expression> expressions = JavaConversions.seqAsJavaList(logicalPlan.expressions());
for (Expression expression : expressions) {
org.dmg.pmml.Expression pmmlExpression = ExpressionTranslator.translate(encoder, expression);
if (pmmlExpression instanceof FieldRef) {
FieldRef fieldRef = (FieldRef) pmmlExpression;
Field<?> field = ensureField(encoder, fieldRef.getField());
if (field != null) {
result.add(field);
continue;
}
}
FieldName name = null;
if (pmmlExpression instanceof AliasExpression) {
AliasExpression aliasExpression = (AliasExpression) pmmlExpression;
name = FieldName.create(aliasExpression.getName());
} else {
name = FieldNameUtil.create("sql", ExpressionUtil.format(expression));
}
DataType dataType = DatasetUtil.translateDataType(expression.dataType());
OpType opType = ExpressionUtil.getOpType(dataType);
pmmlExpression = AliasExpression.unwrap(pmmlExpression);
Visitor visitor = new AbstractVisitor() {
@Override
public VisitorAction visit(FieldRef fieldRef) {
ensureField(encoder, fieldRef.getField());
return super.visit(fieldRef);
}
};
visitor.applyTo(pmmlExpression);
DerivedField derivedField = encoder.createDerivedField(name, opType, dataType, pmmlExpression);
result.add(derivedField);
}
return result;
}
use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.
the class TokenizerConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
Tokenizer transformer = getTransformer();
Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
Apply apply = PMMLUtil.createApply(PMMLFunctions.LOWERCASE, feature.ref());
DerivedField derivedField = encoder.createDerivedField(FieldNameUtil.create("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply);
return Collections.singletonList(new DocumentFeature(encoder, derivedField, "\\s+"));
}
use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.
the class BucketizerConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
Bucketizer transformer = getTransformer();
InOutMode inputMode = getInputMode();
String[] inputCols;
double[][] splitsArray;
if ((InOutMode.SINGLE).equals(inputMode)) {
inputCols = inputMode.getInputCols(transformer);
splitsArray = new double[][] { transformer.getSplits() };
} else if ((InOutMode.MULTIPLE).equals(inputMode)) {
inputCols = inputMode.getInputCols(transformer);
splitsArray = transformer.getSplitsArray();
} else {
throw new IllegalArgumentException();
}
List<Feature> result = new ArrayList<>();
for (int i = 0; i < inputCols.length; i++) {
String inputCol = inputCols[i];
double[] splits = splitsArray[i];
Feature feature = encoder.getOnlyFeature(inputCol);
ContinuousFeature continuousFeature = feature.toContinuousFeature();
Discretize discretize = new Discretize(continuousFeature.getName()).setDataType(DataType.INTEGER);
List<Integer> categories = new ArrayList<>();
for (int j = 0; j < (splits.length - 1); j++) {
Integer category = j;
categories.add(category);
Interval interval = new Interval((j < (splits.length - 2)) ? Interval.Closure.CLOSED_OPEN : Interval.Closure.CLOSED_CLOSED).setLeftMargin(formatMargin(splits[j])).setRightMargin(formatMargin(splits[j + 1]));
DiscretizeBin discretizeBin = new DiscretizeBin(category, interval);
discretize.addDiscretizeBins(discretizeBin);
}
DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CATEGORICAL, DataType.INTEGER, discretize);
result.add(new IndexFeature(encoder, derivedField, categories));
}
return result;
}
Aggregations