Search in sources :

Example 41 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class MinMaxScalerModelConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    MinMaxScalerModel transformer = getTransformer();
    double rescaleFactor = (transformer.getMax() - transformer.getMin());
    double rescaleConstant = transformer.getMin();
    List<Feature> features = encoder.getFeatures(transformer.getInputCol());
    Vector originalMax = transformer.originalMax();
    if (originalMax.size() != features.size()) {
        throw new IllegalArgumentException();
    }
    Vector originalMin = transformer.originalMin();
    if (originalMin.size() != features.size()) {
        throw new IllegalArgumentException();
    }
    List<Feature> result = new ArrayList<>();
    for (int i = 0; i < features.size(); i++) {
        Feature feature = features.get(i);
        ContinuousFeature continuousFeature = feature.toContinuousFeature();
        double max = originalMax.apply(i);
        double min = originalMin.apply(i);
        Expression expression = PMMLUtil.createApply("/", PMMLUtil.createApply("-", continuousFeature.ref(), PMMLUtil.createConstant(min)), PMMLUtil.createConstant(max - min));
        if (!ValueUtil.isOne(rescaleFactor)) {
            expression = PMMLUtil.createApply("*", expression, PMMLUtil.createConstant(rescaleFactor));
        }
        if (!ValueUtil.isZero(rescaleConstant)) {
            expression = PMMLUtil.createApply("+", expression, PMMLUtil.createConstant(rescaleConstant));
        }
        DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CONTINUOUS, DataType.DOUBLE, expression);
        result.add(new ContinuousFeature(encoder, derivedField));
    }
    return result;
}
Also used : MinMaxScalerModel(org.apache.spark.ml.feature.MinMaxScalerModel) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Expression(org.dmg.pmml.Expression) ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Vector(org.apache.spark.ml.linalg.Vector) DerivedField(org.dmg.pmml.DerivedField)

Example 42 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class NGramConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    NGram transformer = getTransformer();
    DocumentFeature documentFeature = (DocumentFeature) encoder.getOnlyFeature(transformer.getInputCol());
    return Collections.<Feature>singletonList(documentFeature);
}
Also used : NGram(org.apache.spark.ml.feature.NGram) DocumentFeature(org.jpmml.sparkml.DocumentFeature) Feature(org.jpmml.converter.Feature) DocumentFeature(org.jpmml.sparkml.DocumentFeature)

Example 43 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class OneHotEncoderConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    OneHotEncoder transformer = getTransformer();
    boolean dropLast = true;
    Option<Object> dropLastOption = transformer.get(transformer.dropLast());
    if (dropLastOption.isDefined()) {
        dropLast = (Boolean) dropLastOption.get();
    }
    CategoricalFeature categoricalFeature = (CategoricalFeature) encoder.getOnlyFeature(transformer.getInputCol());
    List<String> values = categoricalFeature.getValues();
    if (dropLast) {
        values = values.subList(0, values.size() - 1);
    }
    List<Feature> result = new ArrayList<>();
    for (String value : values) {
        result.add(new BinaryFeature(encoder, categoricalFeature.getName(), DataType.STRING, value));
    }
    return result;
}
Also used : OneHotEncoder(org.apache.spark.ml.feature.OneHotEncoder) ArrayList(java.util.ArrayList) BinaryFeature(org.jpmml.converter.BinaryFeature) Feature(org.jpmml.converter.Feature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) BinaryFeature(org.jpmml.converter.BinaryFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature)

Example 44 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class PCAModelConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    PCAModel transformer = getTransformer();
    List<Feature> features = encoder.getFeatures(transformer.getInputCol());
    DenseMatrix pc = transformer.pc();
    if (pc.numRows() != features.size()) {
        throw new IllegalArgumentException();
    }
    List<Feature> result = new ArrayList<>();
    for (int i = 0; i < transformer.getK(); i++) {
        Apply apply = new Apply("sum");
        for (int j = 0; j < features.size(); j++) {
            Feature feature = features.get(j);
            ContinuousFeature continuousFeature = feature.toContinuousFeature();
            Expression expression = continuousFeature.ref();
            Double coefficient = pc.apply(j, i);
            if (!ValueUtil.isOne(coefficient)) {
                expression = PMMLUtil.createApply("*", expression, PMMLUtil.createConstant(coefficient));
            }
            apply.addExpressions(expression);
        }
        DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CONTINUOUS, DataType.DOUBLE, apply);
        result.add(new ContinuousFeature(encoder, derivedField));
    }
    return result;
}
Also used : PCAModel(org.apache.spark.ml.feature.PCAModel) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Expression(org.dmg.pmml.Expression) Apply(org.dmg.pmml.Apply) ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) DerivedField(org.dmg.pmml.DerivedField) DenseMatrix(org.apache.spark.ml.linalg.DenseMatrix)

Example 45 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class BucketizerConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    Bucketizer transformer = getTransformer();
    Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
    ContinuousFeature continuousFeature = feature.toContinuousFeature();
    Discretize discretize = new Discretize(continuousFeature.getName());
    List<String> categories = new ArrayList<>();
    double[] splits = transformer.getSplits();
    for (int i = 0; i < (splits.length - 1); i++) {
        String category = String.valueOf(i);
        categories.add(category);
        Interval interval = new Interval((i < (splits.length - 2)) ? Interval.Closure.CLOSED_OPEN : Interval.Closure.CLOSED_CLOSED).setLeftMargin(formatMargin(splits[i])).setRightMargin(formatMargin(splits[i + 1]));
        DiscretizeBin discretizeBin = new DiscretizeBin(category, interval);
        discretize.addDiscretizeBins(discretizeBin);
    }
    DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.INTEGER, discretize);
    return Collections.<Feature>singletonList(new CategoricalFeature(encoder, derivedField, categories));
}
Also used : ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Discretize(org.dmg.pmml.Discretize) Bucketizer(org.apache.spark.ml.feature.Bucketizer) DiscretizeBin(org.dmg.pmml.DiscretizeBin) DerivedField(org.dmg.pmml.DerivedField) Interval(org.dmg.pmml.Interval)

Aggregations

Feature (org.jpmml.converter.Feature)53 ContinuousFeature (org.jpmml.converter.ContinuousFeature)30 ArrayList (java.util.ArrayList)27 CategoricalFeature (org.jpmml.converter.CategoricalFeature)19 DerivedField (org.dmg.pmml.DerivedField)14 DataField (org.dmg.pmml.DataField)13 FieldName (org.dmg.pmml.FieldName)10 Apply (org.dmg.pmml.Apply)9 BooleanFeature (org.jpmml.converter.BooleanFeature)9 BinaryFeature (org.jpmml.converter.BinaryFeature)7 List (java.util.List)6 Expression (org.dmg.pmml.Expression)6 SimplePredicate (org.dmg.pmml.SimplePredicate)6 Vector (org.apache.spark.ml.linalg.Vector)5 Predicate (org.dmg.pmml.Predicate)5 Node (org.dmg.pmml.tree.Node)5 DocumentFeature (org.jpmml.sparkml.DocumentFeature)5 InteractionFeature (org.jpmml.converter.InteractionFeature)4 DocumentBuilder (javax.xml.parsers.DocumentBuilder)3 Transformer (org.apache.spark.ml.Transformer)3