Search in sources :

Example 36 with Feature

use of org.jpmml.converter.Feature in project jpmml-r by jpmml.

the class RandomForestConverter method encodeFormula.

private void encodeFormula(RExpEncoder encoder) {
    RGenericVector randomForest = getObject();
    RGenericVector forest = (RGenericVector) randomForest.getValue("forest");
    RNumberVector<?> y = (RNumberVector<?>) randomForest.getValue("y", true);
    RExp terms = randomForest.getValue("terms");
    final RNumberVector<?> ncat = (RNumberVector<?>) forest.getValue("ncat");
    final RGenericVector xlevels = (RGenericVector) forest.getValue("xlevels");
    RIntegerVector response = (RIntegerVector) terms.getAttributeValue("response");
    FormulaContext context = new FormulaContext() {

        @Override
        public List<String> getCategories(String variable) {
            if (ncat != null && ncat.hasValue(variable)) {
                if ((ncat.getValue(variable)).doubleValue() > 1d) {
                    RStringVector levels = (RStringVector) xlevels.getValue(variable);
                    return levels.getValues();
                }
            }
            return null;
        }

        @Override
        public RGenericVector getData() {
            return null;
        }
    };
    Formula formula = FormulaUtil.createFormula(terms, context, encoder);
    // Dependent variable
    int responseIndex = response.asScalar();
    if (responseIndex != 0) {
        DataField dataField = (DataField) formula.getField(responseIndex - 1);
        if (y instanceof RIntegerVector) {
            dataField = (DataField) encoder.toCategorical(dataField.getName(), RExpUtil.getFactorLevels(y));
        }
        encoder.setLabel(dataField);
    } else {
        throw new IllegalArgumentException();
    }
    RStringVector xlevelNames = xlevels.names();
    // Independent variables
    for (int i = 0; i < xlevelNames.size(); i++) {
        String xlevelName = xlevelNames.getValue(i);
        Feature feature = formula.resolveFeature(FieldName.create(xlevelName));
        encoder.addFeature(feature);
    }
}
Also used : ContinuousFeature(org.jpmml.converter.ContinuousFeature) Feature(org.jpmml.converter.Feature) BooleanFeature(org.jpmml.converter.BooleanFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) DataField(org.dmg.pmml.DataField)

Example 37 with Feature

use of org.jpmml.converter.Feature in project jpmml-r by jpmml.

the class RExpEncoder method addFeature.

public void addFeature(Field<?> field) {
    Feature feature;
    OpType opType = field.getOpType();
    switch(opType) {
        case CATEGORICAL:
            feature = new CategoricalFeature(this, (DataField) field);
            break;
        case CONTINUOUS:
            feature = new ContinuousFeature(this, field);
            break;
        default:
            throw new IllegalArgumentException();
    }
    addFeature(feature);
}
Also used : ContinuousFeature(org.jpmml.converter.ContinuousFeature) DataField(org.dmg.pmml.DataField) OpType(org.dmg.pmml.OpType) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature)

Example 38 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class ImputerModelConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    ImputerModel transformer = getTransformer();
    Double missingValue = transformer.getMissingValue();
    String strategy = transformer.getStrategy();
    Dataset<Row> surrogateDF = transformer.surrogateDF();
    String[] inputCols = transformer.getInputCols();
    String[] outputCols = transformer.getOutputCols();
    if (inputCols.length != outputCols.length) {
        throw new IllegalArgumentException();
    }
    MissingValueTreatmentMethod missingValueTreatmentMethod = parseStrategy(strategy);
    List<Row> surrogateRows = surrogateDF.collectAsList();
    if (surrogateRows.size() != 1) {
        throw new IllegalArgumentException();
    }
    Row surrogateRow = surrogateRows.get(0);
    List<Feature> result = new ArrayList<>();
    for (int i = 0; i < inputCols.length; i++) {
        String inputCol = inputCols[i];
        String outputCol = outputCols[i];
        Feature feature = encoder.getOnlyFeature(inputCol);
        Field<?> field = encoder.getField(feature.getName());
        if (field instanceof DataField) {
            DataField dataField = (DataField) field;
            Object surrogate = surrogateRow.getAs(inputCol);
            MissingValueDecorator missingValueDecorator = new MissingValueDecorator().setMissingValueReplacement(ValueUtil.formatValue(surrogate)).setMissingValueTreatment(missingValueTreatmentMethod);
            if (missingValue != null && !missingValue.isNaN()) {
                missingValueDecorator.addValues(ValueUtil.formatValue(missingValue));
            }
            encoder.addDecorator(feature.getName(), missingValueDecorator);
        } else {
            throw new IllegalArgumentException();
        }
        result.add(feature);
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) MissingValueDecorator(org.jpmml.converter.MissingValueDecorator) Feature(org.jpmml.converter.Feature) DataField(org.dmg.pmml.DataField) ImputerModel(org.apache.spark.ml.feature.ImputerModel) Row(org.apache.spark.sql.Row) MissingValueTreatmentMethod(org.dmg.pmml.MissingValueTreatmentMethod)

Example 39 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class IndexToStringConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    IndexToString transformer = getTransformer();
    DataField dataField = encoder.createDataField(formatName(transformer), OpType.CATEGORICAL, DataType.STRING, Arrays.asList(transformer.getLabels()));
    return Collections.<Feature>singletonList(new CategoricalFeature(encoder, dataField));
}
Also used : DataField(org.dmg.pmml.DataField) IndexToString(org.apache.spark.ml.feature.IndexToString) Feature(org.jpmml.converter.Feature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature)

Example 40 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class MaxAbsScalerModelConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    MaxAbsScalerModel transformer = getTransformer();
    List<Feature> features = encoder.getFeatures(transformer.getInputCol());
    Vector maxAbs = transformer.maxAbs();
    if (maxAbs.size() != features.size()) {
        throw new IllegalArgumentException();
    }
    List<Feature> result = new ArrayList<>();
    for (int i = 0; i < features.size(); i++) {
        Feature feature = features.get(i);
        double maxAbsUnzero = maxAbs.apply(i);
        if (maxAbsUnzero == 0d) {
            maxAbsUnzero = 1d;
        }
        if (!ValueUtil.isOne(maxAbsUnzero)) {
            ContinuousFeature continuousFeature = feature.toContinuousFeature();
            Expression expression = PMMLUtil.createApply("/", continuousFeature.ref(), PMMLUtil.createConstant(maxAbsUnzero));
            DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CONTINUOUS, DataType.DOUBLE, expression);
            feature = new ContinuousFeature(encoder, derivedField);
        }
        result.add(feature);
    }
    return result;
}
Also used : ContinuousFeature(org.jpmml.converter.ContinuousFeature) Expression(org.dmg.pmml.Expression) MaxAbsScalerModel(org.apache.spark.ml.feature.MaxAbsScalerModel) ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Vector(org.apache.spark.ml.linalg.Vector) DerivedField(org.dmg.pmml.DerivedField)

Aggregations

Feature (org.jpmml.converter.Feature)53 ContinuousFeature (org.jpmml.converter.ContinuousFeature)30 ArrayList (java.util.ArrayList)27 CategoricalFeature (org.jpmml.converter.CategoricalFeature)19 DerivedField (org.dmg.pmml.DerivedField)14 DataField (org.dmg.pmml.DataField)13 FieldName (org.dmg.pmml.FieldName)10 Apply (org.dmg.pmml.Apply)9 BooleanFeature (org.jpmml.converter.BooleanFeature)9 BinaryFeature (org.jpmml.converter.BinaryFeature)7 List (java.util.List)6 Expression (org.dmg.pmml.Expression)6 SimplePredicate (org.dmg.pmml.SimplePredicate)6 Vector (org.apache.spark.ml.linalg.Vector)5 Predicate (org.dmg.pmml.Predicate)5 Node (org.dmg.pmml.tree.Node)5 DocumentFeature (org.jpmml.sparkml.DocumentFeature)5 InteractionFeature (org.jpmml.converter.InteractionFeature)4 DocumentBuilder (javax.xml.parsers.DocumentBuilder)3 Transformer (org.apache.spark.ml.Transformer)3