Search in sources :

Example 51 with DerivedField

use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.

the class MinMaxScalerModelConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    MinMaxScalerModel transformer = getTransformer();
    double rescaleFactor = (transformer.getMax() - transformer.getMin());
    double rescaleConstant = transformer.getMin();
    Vector originalMin = transformer.originalMin();
    Vector originalMax = transformer.originalMax();
    List<Feature> features = encoder.getFeatures(transformer.getInputCol());
    SchemaUtil.checkSize(Math.max(originalMin.size(), originalMax.size()), features);
    List<Feature> result = new ArrayList<>();
    for (int i = 0, length = features.size(); i < length; i++) {
        Feature feature = features.get(i);
        ContinuousFeature continuousFeature = feature.toContinuousFeature();
        double min = originalMin.apply(i);
        double max = originalMax.apply(i);
        Expression expression = PMMLUtil.createApply(PMMLFunctions.DIVIDE, PMMLUtil.createApply(PMMLFunctions.SUBTRACT, continuousFeature.ref(), PMMLUtil.createConstant(min)), PMMLUtil.createConstant(max - min));
        if (!ValueUtil.isOne(rescaleFactor)) {
            expression = PMMLUtil.createApply(PMMLFunctions.MULTIPLY, expression, PMMLUtil.createConstant(rescaleFactor));
        }
        if (!ValueUtil.isZero(rescaleConstant)) {
            expression = PMMLUtil.createApply(PMMLFunctions.ADD, expression, PMMLUtil.createConstant(rescaleConstant));
        }
        DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i, length), OpType.CONTINUOUS, DataType.DOUBLE, expression);
        result.add(new ContinuousFeature(encoder, derivedField));
    }
    return result;
}
Also used : MinMaxScalerModel(org.apache.spark.ml.feature.MinMaxScalerModel) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Expression(org.dmg.pmml.Expression) ArrayList(java.util.ArrayList) Vector(org.apache.spark.ml.linalg.Vector) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) DerivedField(org.dmg.pmml.DerivedField)

Example 52 with DerivedField

use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.

the class SQLTransformerConverter method encodeLogicalPlan.

public static List<Field<?>> encodeLogicalPlan(SparkMLEncoder encoder, LogicalPlan logicalPlan) {
    List<Field<?>> result = new ArrayList<>();
    List<LogicalPlan> children = JavaConversions.seqAsJavaList(logicalPlan.children());
    for (LogicalPlan child : children) {
        encodeLogicalPlan(encoder, child);
    }
    List<Expression> expressions = JavaConversions.seqAsJavaList(logicalPlan.expressions());
    for (Expression expression : expressions) {
        org.dmg.pmml.Expression pmmlExpression = ExpressionTranslator.translate(encoder, expression);
        if (pmmlExpression instanceof FieldRef) {
            FieldRef fieldRef = (FieldRef) pmmlExpression;
            Field<?> field = ensureField(encoder, fieldRef.getField());
            if (field != null) {
                result.add(field);
                continue;
            }
        }
        FieldName name = null;
        if (pmmlExpression instanceof AliasExpression) {
            AliasExpression aliasExpression = (AliasExpression) pmmlExpression;
            name = FieldName.create(aliasExpression.getName());
        } else {
            name = FieldNameUtil.create("sql", ExpressionUtil.format(expression));
        }
        DataType dataType = DatasetUtil.translateDataType(expression.dataType());
        OpType opType = ExpressionUtil.getOpType(dataType);
        pmmlExpression = AliasExpression.unwrap(pmmlExpression);
        Visitor visitor = new AbstractVisitor() {

            @Override
            public VisitorAction visit(FieldRef fieldRef) {
                ensureField(encoder, fieldRef.getField());
                return super.visit(fieldRef);
            }
        };
        visitor.applyTo(pmmlExpression);
        DerivedField derivedField = encoder.createDerivedField(name, opType, dataType, pmmlExpression);
        result.add(derivedField);
    }
    return result;
}
Also used : AbstractVisitor(org.jpmml.model.visitors.AbstractVisitor) FieldRef(org.dmg.pmml.FieldRef) Visitor(org.dmg.pmml.Visitor) AbstractVisitor(org.jpmml.model.visitors.AbstractVisitor) ArrayList(java.util.ArrayList) Field(org.dmg.pmml.Field) DerivedField(org.dmg.pmml.DerivedField) AliasExpression(org.jpmml.sparkml.AliasExpression) Expression(org.apache.spark.sql.catalyst.expressions.Expression) AliasExpression(org.jpmml.sparkml.AliasExpression) DataType(org.dmg.pmml.DataType) LogicalPlan(org.apache.spark.sql.catalyst.plans.logical.LogicalPlan) OpType(org.dmg.pmml.OpType) FieldName(org.dmg.pmml.FieldName) DerivedField(org.dmg.pmml.DerivedField)

Example 53 with DerivedField

use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.

the class TokenizerConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    Tokenizer transformer = getTransformer();
    Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
    Apply apply = PMMLUtil.createApply(PMMLFunctions.LOWERCASE, feature.ref());
    DerivedField derivedField = encoder.createDerivedField(FieldNameUtil.create("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply);
    return Collections.singletonList(new DocumentFeature(encoder, derivedField, "\\s+"));
}
Also used : Apply(org.dmg.pmml.Apply) DocumentFeature(org.jpmml.sparkml.DocumentFeature) Tokenizer(org.apache.spark.ml.feature.Tokenizer) Feature(org.jpmml.converter.Feature) DocumentFeature(org.jpmml.sparkml.DocumentFeature) DerivedField(org.dmg.pmml.DerivedField)

Example 54 with DerivedField

use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.

the class BucketizerConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    Bucketizer transformer = getTransformer();
    InOutMode inputMode = getInputMode();
    String[] inputCols;
    double[][] splitsArray;
    if ((InOutMode.SINGLE).equals(inputMode)) {
        inputCols = inputMode.getInputCols(transformer);
        splitsArray = new double[][] { transformer.getSplits() };
    } else if ((InOutMode.MULTIPLE).equals(inputMode)) {
        inputCols = inputMode.getInputCols(transformer);
        splitsArray = transformer.getSplitsArray();
    } else {
        throw new IllegalArgumentException();
    }
    List<Feature> result = new ArrayList<>();
    for (int i = 0; i < inputCols.length; i++) {
        String inputCol = inputCols[i];
        double[] splits = splitsArray[i];
        Feature feature = encoder.getOnlyFeature(inputCol);
        ContinuousFeature continuousFeature = feature.toContinuousFeature();
        Discretize discretize = new Discretize(continuousFeature.getName()).setDataType(DataType.INTEGER);
        List<Integer> categories = new ArrayList<>();
        for (int j = 0; j < (splits.length - 1); j++) {
            Integer category = j;
            categories.add(category);
            Interval interval = new Interval((j < (splits.length - 2)) ? Interval.Closure.CLOSED_OPEN : Interval.Closure.CLOSED_CLOSED).setLeftMargin(formatMargin(splits[j])).setRightMargin(formatMargin(splits[j + 1]));
            DiscretizeBin discretizeBin = new DiscretizeBin(category, interval);
            discretize.addDiscretizeBins(discretizeBin);
        }
        DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CATEGORICAL, DataType.INTEGER, discretize);
        result.add(new IndexFeature(encoder, derivedField, categories));
    }
    return result;
}
Also used : IndexFeature(org.jpmml.converter.IndexFeature) ArrayList(java.util.ArrayList) IndexFeature(org.jpmml.converter.IndexFeature) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Discretize(org.dmg.pmml.Discretize) Bucketizer(org.apache.spark.ml.feature.Bucketizer) DiscretizeBin(org.dmg.pmml.DiscretizeBin) DerivedField(org.dmg.pmml.DerivedField) Interval(org.dmg.pmml.Interval)

Aggregations

DerivedField (org.dmg.pmml.DerivedField)54 ArrayList (java.util.ArrayList)21 Feature (org.jpmml.converter.Feature)18 ContinuousFeature (org.jpmml.converter.ContinuousFeature)17 FieldName (org.dmg.pmml.FieldName)15 Apply (org.dmg.pmml.Apply)11 Expression (org.dmg.pmml.Expression)10 DataField (org.dmg.pmml.DataField)8 Test (org.junit.Test)8 KiePMMLDerivedField (org.kie.pmml.commons.transformations.KiePMMLDerivedField)8 List (java.util.List)7 Constant (org.dmg.pmml.Constant)7 FieldRef (org.dmg.pmml.FieldRef)6 MapValues (org.dmg.pmml.MapValues)6 NormContinuous (org.dmg.pmml.NormContinuous)6 BlockStmt (com.github.javaparser.ast.stmt.BlockStmt)5 PMML (org.dmg.pmml.PMML)5 DataType (org.dmg.pmml.DataType)4 Discretize (org.dmg.pmml.Discretize)4 Statement (com.github.javaparser.ast.stmt.Statement)3