Search in sources :

Example 11 with ContinuousFeature

use of org.jpmml.converter.ContinuousFeature in project jpmml-sparkml by jpmml.

the class TermFeature method toContinuousFeature.

@Override
public ContinuousFeature toContinuousFeature() {
    PMMLEncoder encoder = ensureEncoder();
    DerivedField derivedField = encoder.getDerivedField(getName());
    if (derivedField == null) {
        Apply apply = createApply();
        derivedField = encoder.createDerivedField(getName(), OpType.CONTINUOUS, getDataType(), apply);
    }
    return new ContinuousFeature(encoder, derivedField);
}
Also used : ContinuousFeature(org.jpmml.converter.ContinuousFeature) Apply(org.dmg.pmml.Apply) PMMLEncoder(org.jpmml.converter.PMMLEncoder) DerivedField(org.dmg.pmml.DerivedField)

Example 12 with ContinuousFeature

use of org.jpmml.converter.ContinuousFeature in project jpmml-sparkml by jpmml.

the class StandardScalerModelConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    StandardScalerModel transformer = getTransformer();
    List<Feature> features = encoder.getFeatures(transformer.getInputCol());
    Vector mean = transformer.mean();
    if (transformer.getWithMean() && mean.size() != features.size()) {
        throw new IllegalArgumentException();
    }
    Vector std = transformer.std();
    if (transformer.getWithStd() && std.size() != features.size()) {
        throw new IllegalArgumentException();
    }
    List<Feature> result = new ArrayList<>();
    for (int i = 0; i < features.size(); i++) {
        Feature feature = features.get(i);
        ContinuousFeature continuousFeature = feature.toContinuousFeature();
        Expression expression = continuousFeature.ref();
        if (transformer.getWithMean()) {
            double meanValue = mean.apply(i);
            if (!ValueUtil.isZero(meanValue)) {
                expression = PMMLUtil.createApply("-", expression, PMMLUtil.createConstant(meanValue));
            }
        }
        if (transformer.getWithStd()) {
            double stdValue = std.apply(i);
            if (!ValueUtil.isOne(stdValue)) {
                expression = PMMLUtil.createApply("*", expression, PMMLUtil.createConstant(1d / stdValue));
            }
        }
        DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CONTINUOUS, DataType.DOUBLE, expression);
        result.add(new ContinuousFeature(encoder, derivedField));
    }
    return result;
}
Also used : StandardScalerModel(org.apache.spark.ml.feature.StandardScalerModel) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Expression(org.dmg.pmml.Expression) ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Vector(org.apache.spark.ml.linalg.Vector) DerivedField(org.dmg.pmml.DerivedField)

Example 13 with ContinuousFeature

use of org.jpmml.converter.ContinuousFeature in project jpmml-sparkml by jpmml.

the class VectorIndexerModelConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    VectorIndexerModel transformer = getTransformer();
    List<Feature> features = encoder.getFeatures(transformer.getInputCol());
    int numFeatures = transformer.numFeatures();
    if (numFeatures != features.size()) {
        throw new IllegalArgumentException("Expected " + numFeatures + " features, got " + features.size() + " features");
    }
    Map<Integer, Map<Double, Integer>> categoryMaps = transformer.javaCategoryMaps();
    List<Feature> result = new ArrayList<>();
    for (int i = 0; i < numFeatures; i++) {
        Feature feature = features.get(i);
        Map<Double, Integer> categoryMap = categoryMaps.get(i);
        if (categoryMap != null) {
            List<String> categories = new ArrayList<>();
            List<String> values = new ArrayList<>();
            DocumentBuilder documentBuilder = DOMUtil.createDocumentBuilder();
            InlineTable inlineTable = new InlineTable();
            List<String> columns = Arrays.asList("input", "output");
            List<Map.Entry<Double, Integer>> entries = new ArrayList<>(categoryMap.entrySet());
            Collections.sort(entries, VectorIndexerModelConverter.COMPARATOR);
            for (Map.Entry<Double, Integer> entry : entries) {
                String category = ValueUtil.formatValue(entry.getKey());
                categories.add(category);
                String value = ValueUtil.formatValue(entry.getValue());
                values.add(value);
                Row row = DOMUtil.createRow(documentBuilder, columns, Arrays.asList(category, value));
                inlineTable.addRows(row);
            }
            encoder.toCategorical(feature.getName(), categories);
            MapValues mapValues = new MapValues().addFieldColumnPairs(new FieldColumnPair(feature.getName(), columns.get(0))).setOutputColumn(columns.get(1)).setInlineTable(inlineTable);
            DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CATEGORICAL, DataType.INTEGER, mapValues);
            result.add(new CategoricalFeature(encoder, derivedField, values));
        } else {
            result.add((ContinuousFeature) feature);
        }
    }
    return result;
}
Also used : InlineTable(org.dmg.pmml.InlineTable) ArrayList(java.util.ArrayList) FieldColumnPair(org.dmg.pmml.FieldColumnPair) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) DocumentBuilder(javax.xml.parsers.DocumentBuilder) MapValues(org.dmg.pmml.MapValues) VectorIndexerModel(org.apache.spark.ml.feature.VectorIndexerModel) Row(org.dmg.pmml.Row) Map(java.util.Map) DerivedField(org.dmg.pmml.DerivedField)

Example 14 with ContinuousFeature

use of org.jpmml.converter.ContinuousFeature in project jpmml-sparkml by jpmml.

the class BinarizerConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    Binarizer transformer = getTransformer();
    Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
    ContinuousFeature continuousFeature = feature.toContinuousFeature();
    Apply apply = new Apply("if").addExpressions(PMMLUtil.createApply("lessOrEqual", continuousFeature.ref(), PMMLUtil.createConstant(transformer.getThreshold()))).addExpressions(PMMLUtil.createConstant(0d), PMMLUtil.createConstant(1d));
    DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.DOUBLE, apply);
    return Collections.<Feature>singletonList(new CategoricalFeature(encoder, derivedField, Arrays.asList("0", "1")));
}
Also used : ContinuousFeature(org.jpmml.converter.ContinuousFeature) Apply(org.dmg.pmml.Apply) Binarizer(org.apache.spark.ml.feature.Binarizer) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) DerivedField(org.dmg.pmml.DerivedField) CategoricalFeature(org.jpmml.converter.CategoricalFeature)

Example 15 with ContinuousFeature

use of org.jpmml.converter.ContinuousFeature in project jpmml-sparkml by jpmml.

the class ClusteringModelConverter method registerOutputFields.

@Override
public List<OutputField> registerOutputFields(Label label, SparkMLEncoder encoder) {
    T model = getTransformer();
    String predictionCol = model.getPredictionCol();
    OutputField predictedField = ModelUtil.createPredictedField(FieldName.create(predictionCol), DataType.STRING, OpType.CATEGORICAL);
    Feature feature = new Feature(encoder, predictedField.getName(), predictedField.getDataType()) {

        @Override
        public ContinuousFeature toContinuousFeature() {
            throw new UnsupportedOperationException();
        }
    };
    encoder.putOnlyFeature(predictionCol, feature);
    return Collections.singletonList(predictedField);
}
Also used : OutputField(org.dmg.pmml.OutputField) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature)

Aggregations

ContinuousFeature (org.jpmml.converter.ContinuousFeature)26 Feature (org.jpmml.converter.Feature)23 ArrayList (java.util.ArrayList)13 DerivedField (org.dmg.pmml.DerivedField)13 CategoricalFeature (org.jpmml.converter.CategoricalFeature)12 Apply (org.dmg.pmml.Apply)7 FieldName (org.dmg.pmml.FieldName)7 DataField (org.dmg.pmml.DataField)6 Expression (org.dmg.pmml.Expression)6 Predicate (org.dmg.pmml.Predicate)6 SimplePredicate (org.dmg.pmml.SimplePredicate)6 Node (org.dmg.pmml.tree.Node)6 OutputField (org.dmg.pmml.OutputField)4 BooleanFeature (org.jpmml.converter.BooleanFeature)4 Vector (org.apache.spark.ml.linalg.Vector)3 CategoricalLabel (org.jpmml.converter.CategoricalLabel)3 List (java.util.List)2 DocumentBuilder (javax.xml.parsers.DocumentBuilder)2 DataType (org.dmg.pmml.DataType)2 FieldColumnPair (org.dmg.pmml.FieldColumnPair)2