Search in sources :

Example 66 with Feature

use of org.jpmml.converter.Feature in project jpmml-r by jpmml.

the class XGBoostConverter method encodeSchema.

@Override
public void encodeSchema(RExpEncoder encoder) {
    RGenericVector booster = getObject();
    RStringVector featureNames = booster.getStringElement("feature_names", false);
    RGenericVector schema = booster.getGenericElement("schema", false);
    FeatureMap featureMap = ensureFeatureMap();
    if (featureNames != null) {
        checkFeatureMap(featureMap, featureNames);
    }
    if (schema != null) {
        RVector<?> missing = schema.getVectorElement("missing", false);
        if (missing != null) {
            featureMap.addMissingValue(ValueUtil.asString(missing.asScalar()));
        }
    }
    Learner learner = ensureLearner();
    ObjFunction obj = learner.obj();
    String targetField = "_target";
    List<String> targetCategories = null;
    if (schema != null) {
        RStringVector responseName = schema.getStringElement("response_name", false);
        RStringVector responseLevels = schema.getStringElement("response_levels", false);
        if (responseName != null) {
            targetField = responseName.asScalar();
        }
        if (responseLevels != null) {
            targetCategories = responseLevels.getValues();
        }
    }
    Label label = obj.encodeLabel(targetField, targetCategories, encoder);
    encoder.setLabel(label);
    List<Feature> features = featureMap.encodeFeatures(encoder);
    for (Feature feature : features) {
        encoder.addFeature(feature);
    }
}
Also used : FeatureMap(org.jpmml.xgboost.FeatureMap) Label(org.jpmml.converter.Label) RGenericVector(org.jpmml.rexp.RGenericVector) Feature(org.jpmml.converter.Feature) Learner(org.jpmml.xgboost.Learner) RStringVector(org.jpmml.rexp.RStringVector) ObjFunction(org.jpmml.xgboost.ObjFunction)

Example 67 with Feature

use of org.jpmml.converter.Feature in project jpmml-r by jpmml.

the class EarthConverter method encodeSchema.

@Override
public void encodeSchema(RExpEncoder encoder) {
    RGenericVector earth = getObject();
    RDoubleVector dirs = earth.getDoubleElement("dirs");
    RDoubleVector cuts = earth.getDoubleElement("cuts");
    RDoubleVector selectedTerms = earth.getDoubleElement("selected.terms");
    RDoubleVector coefficients = earth.getDoubleElement("coefficients");
    RExp terms = earth.getElement("terms");
    RGenericVector xlevels = DecorationUtil.getGenericElement(earth, "xlevels");
    RStringVector dirsRows = dirs.dimnames(0);
    RStringVector dirsColumns = dirs.dimnames(1);
    RStringVector cutsRows = cuts.dimnames(0);
    RStringVector cutsColumns = cuts.dimnames(1);
    if (!(dirsRows.getValues()).equals(cutsRows.getValues()) || !(dirsColumns.getValues()).equals(cutsColumns.getValues())) {
        throw new IllegalArgumentException();
    }
    int rows = dirsRows.size();
    int columns = dirsColumns.size();
    List<String> predictorNames = dirsColumns.getValues();
    FormulaContext context = new XLevelsFormulaContext(xlevels);
    Formula formula = FormulaUtil.createFormula(terms, context, encoder);
    {
        RStringVector yNames = coefficients.dimnames(1);
        DataField dataField = (DataField) encoder.getField(yNames.asScalar());
        encoder.setLabel(dataField);
    }
    for (int i = 1; i < selectedTerms.size(); i++) {
        int termIndex = ValueUtil.asInt(selectedTerms.getValue(i)) - 1;
        List<Double> dirsRow = FortranMatrixUtil.getRow(dirs.getValues(), rows, columns, termIndex);
        List<Double> cutsRow = FortranMatrixUtil.getRow(cuts.getValues(), rows, columns, termIndex);
        List<Feature> features = new ArrayList<>();
        predictors: for (int j = 0; j < predictorNames.size(); j++) {
            String predictorName = predictorNames.get(j);
            int dir = ValueUtil.asInt(dirsRow.get(j));
            double cut = cutsRow.get(j);
            if (dir == 0) {
                continue predictors;
            }
            Feature feature = formula.resolveComplexFeature(predictorName);
            switch(dir) {
                case -1:
                case 1:
                    {
                        ContinuousFeature continuousFeature = feature.toContinuousFeature();
                        DerivedField derivedField = encoder.ensureDerivedField(formatHingeFunction(dir, continuousFeature, cut), OpType.CONTINUOUS, DataType.DOUBLE, () -> createHingeFunction(dir, continuousFeature, cut));
                        feature = new ContinuousFeature(encoder, derivedField);
                    }
                    break;
                case 2:
                    break;
                default:
                    throw new IllegalArgumentException();
            }
            features.add(feature);
        }
        Feature feature;
        if (features.size() == 1) {
            feature = features.get(0);
        } else if (features.size() > 1) {
            feature = new InteractionFeature(encoder, dirsRows.getValue(i), DataType.DOUBLE, features);
        } else {
            throw new IllegalArgumentException();
        }
        encoder.addFeature(feature);
    }
}
Also used : InteractionFeature(org.jpmml.converter.InteractionFeature) ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) InteractionFeature(org.jpmml.converter.InteractionFeature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) DataField(org.dmg.pmml.DataField) DerivedField(org.dmg.pmml.DerivedField)

Example 68 with Feature

use of org.jpmml.converter.Feature in project jpmml-r by jpmml.

the class BinaryTreeConverter method encodeNode.

private Node encodeNode(RGenericVector tree, Predicate predicate, Schema schema) {
    RIntegerVector nodeId = tree.getIntegerElement("nodeID");
    RBooleanVector terminal = tree.getBooleanElement("terminal");
    RGenericVector psplit = tree.getGenericElement("psplit");
    RGenericVector ssplits = tree.getGenericElement("ssplits");
    RDoubleVector prediction = tree.getDoubleElement("prediction");
    RGenericVector left = tree.getGenericElement("left");
    RGenericVector right = tree.getGenericElement("right");
    Integer id = nodeId.asScalar();
    if ((Boolean.TRUE).equals(terminal.asScalar())) {
        Node result = new LeafNode(null, predicate).setId(id);
        return encodeScore(result, prediction, schema);
    }
    RNumberVector<?> splitpoint = psplit.getNumericElement("splitpoint");
    RStringVector variableName = psplit.getStringElement("variableName");
    if (ssplits.size() > 0) {
        throw new IllegalArgumentException();
    }
    Predicate leftPredicate;
    Predicate rightPredicate;
    String name = variableName.asScalar();
    Integer index = this.featureIndexes.get(name);
    if (index == null) {
        throw new IllegalArgumentException();
    }
    Feature feature = schema.getFeature(index);
    if (feature instanceof CategoricalFeature) {
        CategoricalFeature categoricalFeature = (CategoricalFeature) feature;
        List<?> values = categoricalFeature.getValues();
        List<Integer> splitValues = (List<Integer>) splitpoint.getValues();
        leftPredicate = createPredicate(categoricalFeature, selectValues(values, splitValues, true));
        rightPredicate = createPredicate(categoricalFeature, selectValues(values, splitValues, false));
    } else {
        ContinuousFeature continuousFeature = feature.toContinuousFeature();
        Number value = splitpoint.asScalar();
        leftPredicate = createSimplePredicate(continuousFeature, SimplePredicate.Operator.LESS_OR_EQUAL, value);
        rightPredicate = createSimplePredicate(continuousFeature, SimplePredicate.Operator.GREATER_THAN, value);
    }
    Node leftChild = encodeNode(left, leftPredicate, schema);
    Node rightChild = encodeNode(right, rightPredicate, schema);
    Node result = new BranchNode(null, predicate).setId(id).addNodes(leftChild, rightChild);
    return result;
}
Also used : Node(org.dmg.pmml.tree.Node) ClassifierNode(org.dmg.pmml.tree.ClassifierNode) BranchNode(org.dmg.pmml.tree.BranchNode) LeafNode(org.dmg.pmml.tree.LeafNode) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Feature(org.jpmml.converter.Feature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) Predicate(org.dmg.pmml.Predicate) SimplePredicate(org.dmg.pmml.SimplePredicate) BranchNode(org.dmg.pmml.tree.BranchNode) ContinuousFeature(org.jpmml.converter.ContinuousFeature) LeafNode(org.dmg.pmml.tree.LeafNode) ArrayList(java.util.ArrayList) List(java.util.List)

Example 69 with Feature

use of org.jpmml.converter.Feature in project jpmml-r by jpmml.

the class Formula method addField.

public void addField(Field<?> field) {
    RExpEncoder encoder = getEncoder();
    Feature feature = new ContinuousFeature(encoder, field);
    if (field instanceof DerivedField) {
        DerivedField derivedField = (DerivedField) field;
        Expression expression = derivedField.requireExpression();
        if (expression instanceof Apply) {
            Apply apply = (Apply) expression;
            if (checkApply(apply, PMMLFunctions.POW, FieldRef.class, Constant.class)) {
                List<Expression> expressions = apply.getExpressions();
                FieldRef fieldRef = (FieldRef) expressions.get(0);
                Constant constant = (Constant) expressions.get(1);
                try {
                    String string = ValueUtil.asString(constant.getValue());
                    int power = Integer.parseInt(string);
                    feature = new PowerFeature(encoder, fieldRef.requireField(), DataType.DOUBLE, power);
                } catch (NumberFormatException nfe) {
                // Ignored
                }
            }
        }
    }
    putFeature(field.requireName(), feature);
    this.fields.add(field);
}
Also used : PowerFeature(org.jpmml.converter.PowerFeature) FieldRef(org.dmg.pmml.FieldRef) Apply(org.dmg.pmml.Apply) Constant(org.dmg.pmml.Constant) Feature(org.jpmml.converter.Feature) PowerFeature(org.jpmml.converter.PowerFeature) BinaryFeature(org.jpmml.converter.BinaryFeature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) BooleanFeature(org.jpmml.converter.BooleanFeature) InteractionFeature(org.jpmml.converter.InteractionFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Expression(org.dmg.pmml.Expression) DerivedField(org.dmg.pmml.DerivedField)

Aggregations

Feature (org.jpmml.converter.Feature)69 ContinuousFeature (org.jpmml.converter.ContinuousFeature)39 ArrayList (java.util.ArrayList)30 CategoricalFeature (org.jpmml.converter.CategoricalFeature)29 DerivedField (org.dmg.pmml.DerivedField)16 DataField (org.dmg.pmml.DataField)12 SimplePredicate (org.dmg.pmml.SimplePredicate)12 List (java.util.List)11 Predicate (org.dmg.pmml.Predicate)11 Node (org.dmg.pmml.tree.Node)11 BooleanFeature (org.jpmml.converter.BooleanFeature)11 Apply (org.dmg.pmml.Apply)9 BinaryFeature (org.jpmml.converter.BinaryFeature)9 Expression (org.dmg.pmml.Expression)7 FieldName (org.dmg.pmml.FieldName)7 CategoricalLabel (org.jpmml.converter.CategoricalLabel)7 InteractionFeature (org.jpmml.converter.InteractionFeature)6 Vector (org.apache.spark.ml.linalg.Vector)5 BranchNode (org.dmg.pmml.tree.BranchNode)5 LeafNode (org.dmg.pmml.tree.LeafNode)5