Search in sources :

Example 16 with DerivedField

use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.

the class MinMaxScalerModelConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    MinMaxScalerModel transformer = getTransformer();
    double rescaleFactor = (transformer.getMax() - transformer.getMin());
    double rescaleConstant = transformer.getMin();
    List<Feature> features = encoder.getFeatures(transformer.getInputCol());
    Vector originalMax = transformer.originalMax();
    if (originalMax.size() != features.size()) {
        throw new IllegalArgumentException();
    }
    Vector originalMin = transformer.originalMin();
    if (originalMin.size() != features.size()) {
        throw new IllegalArgumentException();
    }
    List<Feature> result = new ArrayList<>();
    for (int i = 0; i < features.size(); i++) {
        Feature feature = features.get(i);
        ContinuousFeature continuousFeature = feature.toContinuousFeature();
        double max = originalMax.apply(i);
        double min = originalMin.apply(i);
        Expression expression = PMMLUtil.createApply("/", PMMLUtil.createApply("-", continuousFeature.ref(), PMMLUtil.createConstant(min)), PMMLUtil.createConstant(max - min));
        if (!ValueUtil.isOne(rescaleFactor)) {
            expression = PMMLUtil.createApply("*", expression, PMMLUtil.createConstant(rescaleFactor));
        }
        if (!ValueUtil.isZero(rescaleConstant)) {
            expression = PMMLUtil.createApply("+", expression, PMMLUtil.createConstant(rescaleConstant));
        }
        DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CONTINUOUS, DataType.DOUBLE, expression);
        result.add(new ContinuousFeature(encoder, derivedField));
    }
    return result;
}
Also used : MinMaxScalerModel(org.apache.spark.ml.feature.MinMaxScalerModel) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Expression(org.dmg.pmml.Expression) ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Vector(org.apache.spark.ml.linalg.Vector) DerivedField(org.dmg.pmml.DerivedField)

Example 17 with DerivedField

use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.

the class PCAModelConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    PCAModel transformer = getTransformer();
    List<Feature> features = encoder.getFeatures(transformer.getInputCol());
    DenseMatrix pc = transformer.pc();
    if (pc.numRows() != features.size()) {
        throw new IllegalArgumentException();
    }
    List<Feature> result = new ArrayList<>();
    for (int i = 0; i < transformer.getK(); i++) {
        Apply apply = new Apply("sum");
        for (int j = 0; j < features.size(); j++) {
            Feature feature = features.get(j);
            ContinuousFeature continuousFeature = feature.toContinuousFeature();
            Expression expression = continuousFeature.ref();
            Double coefficient = pc.apply(j, i);
            if (!ValueUtil.isOne(coefficient)) {
                expression = PMMLUtil.createApply("*", expression, PMMLUtil.createConstant(coefficient));
            }
            apply.addExpressions(expression);
        }
        DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CONTINUOUS, DataType.DOUBLE, apply);
        result.add(new ContinuousFeature(encoder, derivedField));
    }
    return result;
}
Also used : PCAModel(org.apache.spark.ml.feature.PCAModel) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Expression(org.dmg.pmml.Expression) Apply(org.dmg.pmml.Apply) ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) DerivedField(org.dmg.pmml.DerivedField) DenseMatrix(org.apache.spark.ml.linalg.DenseMatrix)

Example 18 with DerivedField

use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.

the class BucketizerConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    Bucketizer transformer = getTransformer();
    Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
    ContinuousFeature continuousFeature = feature.toContinuousFeature();
    Discretize discretize = new Discretize(continuousFeature.getName());
    List<String> categories = new ArrayList<>();
    double[] splits = transformer.getSplits();
    for (int i = 0; i < (splits.length - 1); i++) {
        String category = String.valueOf(i);
        categories.add(category);
        Interval interval = new Interval((i < (splits.length - 2)) ? Interval.Closure.CLOSED_OPEN : Interval.Closure.CLOSED_CLOSED).setLeftMargin(formatMargin(splits[i])).setRightMargin(formatMargin(splits[i + 1]));
        DiscretizeBin discretizeBin = new DiscretizeBin(category, interval);
        discretize.addDiscretizeBins(discretizeBin);
    }
    DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.INTEGER, discretize);
    return Collections.<Feature>singletonList(new CategoricalFeature(encoder, derivedField, categories));
}
Also used : ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Discretize(org.dmg.pmml.Discretize) Bucketizer(org.apache.spark.ml.feature.Bucketizer) DiscretizeBin(org.dmg.pmml.DiscretizeBin) DerivedField(org.dmg.pmml.DerivedField) Interval(org.dmg.pmml.Interval)

Example 19 with DerivedField

use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.

the class StringIndexerModelConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    StringIndexerModel transformer = getTransformer();
    Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
    List<String> categories = new ArrayList<>();
    categories.addAll(Arrays.asList(transformer.labels()));
    String handleInvalid = transformer.getHandleInvalid();
    Field<?> field = encoder.toCategorical(feature.getName(), categories);
    if (field instanceof DataField) {
        DataField dataField = (DataField) field;
        InvalidValueTreatmentMethod invalidValueTreatmentMethod;
        switch(handleInvalid) {
            case "keep":
                invalidValueTreatmentMethod = InvalidValueTreatmentMethod.AS_IS;
                break;
            case "error":
                invalidValueTreatmentMethod = InvalidValueTreatmentMethod.RETURN_INVALID;
                break;
            default:
                throw new IllegalArgumentException(handleInvalid);
        }
        InvalidValueDecorator invalidValueDecorator = new InvalidValueDecorator().setInvalidValueTreatment(invalidValueTreatmentMethod);
        encoder.addDecorator(dataField.getName(), invalidValueDecorator);
    } else if (field instanceof DerivedField) {
    // Ignored
    } else {
        throw new IllegalArgumentException();
    }
    switch(handleInvalid) {
        case "keep":
            Apply setApply = PMMLUtil.createApply("isIn", feature.ref());
            for (String category : categories) {
                setApply.addExpressions(PMMLUtil.createConstant(category, feature.getDataType()));
            }
            categories.add(StringIndexerModelConverter.LABEL_UNKNOWN);
            Apply apply = PMMLUtil.createApply("if", setApply, feature.ref(), PMMLUtil.createConstant(StringIndexerModelConverter.LABEL_UNKNOWN, DataType.STRING));
            field = encoder.createDerivedField(FeatureUtil.createName("handleInvalid", feature), OpType.CATEGORICAL, feature.getDataType(), apply);
            break;
        default:
            break;
    }
    return Collections.<Feature>singletonList(new CategoricalFeature(encoder, field, categories));
}
Also used : Apply(org.dmg.pmml.Apply) ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) StringIndexerModel(org.apache.spark.ml.feature.StringIndexerModel) CategoricalFeature(org.jpmml.converter.CategoricalFeature) InvalidValueTreatmentMethod(org.dmg.pmml.InvalidValueTreatmentMethod) InvalidValueDecorator(org.jpmml.converter.InvalidValueDecorator) DataField(org.dmg.pmml.DataField) DerivedField(org.dmg.pmml.DerivedField)

Example 20 with DerivedField

use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.

the class TokenizerConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    Tokenizer transformer = getTransformer();
    Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
    Apply apply = PMMLUtil.createApply("lowercase", feature.ref());
    DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply);
    return Collections.<Feature>singletonList(new DocumentFeature(encoder, derivedField, "\\s+"));
}
Also used : Apply(org.dmg.pmml.Apply) DocumentFeature(org.jpmml.sparkml.DocumentFeature) Tokenizer(org.apache.spark.ml.feature.Tokenizer) Feature(org.jpmml.converter.Feature) DocumentFeature(org.jpmml.sparkml.DocumentFeature) DerivedField(org.dmg.pmml.DerivedField)

Aggregations

DerivedField (org.dmg.pmml.DerivedField)27 ArrayList (java.util.ArrayList)16 Feature (org.jpmml.converter.Feature)14 ContinuousFeature (org.jpmml.converter.ContinuousFeature)13 FieldName (org.dmg.pmml.FieldName)10 Apply (org.dmg.pmml.Apply)9 Expression (org.dmg.pmml.Expression)8 NormContinuous (org.dmg.pmml.NormContinuous)6 CategoricalFeature (org.jpmml.converter.CategoricalFeature)5 DataField (org.dmg.pmml.DataField)4 Discretize (org.dmg.pmml.Discretize)4 MapValues (org.dmg.pmml.MapValues)4 Vector (org.apache.spark.ml.linalg.Vector)3 FieldRef (org.dmg.pmml.FieldRef)3 LocalTransformations (org.dmg.pmml.LocalTransformations)3 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 Constant (org.dmg.pmml.Constant)2 DiscretizeBin (org.dmg.pmml.DiscretizeBin)2