Search in sources :

Example 31 with Feature

use of org.jpmml.converter.Feature in project jpmml-r by jpmml.

the class Formula method resolveFeature.

public Feature resolveFeature(String name) {
    RExpEncoder encoder = getEncoder();
    List<String> variables = split(name);
    if (variables.size() == 1) {
        return resolveFeature(FieldName.create(name));
    } else {
        List<Feature> variableFeatures = new ArrayList<>();
        for (String variable : variables) {
            Feature variableFeature = resolveFeature(FieldName.create(variable));
            variableFeatures.add(variableFeature);
        }
        return new InteractionFeature(encoder, FieldName.create(name), DataType.DOUBLE, variableFeatures);
    }
}
Also used : InteractionFeature(org.jpmml.converter.InteractionFeature) ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) PowerFeature(org.jpmml.converter.PowerFeature) BinaryFeature(org.jpmml.converter.BinaryFeature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) BooleanFeature(org.jpmml.converter.BooleanFeature) InteractionFeature(org.jpmml.converter.InteractionFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature)

Example 32 with Feature

use of org.jpmml.converter.Feature in project jpmml-r by jpmml.

the class SVMConverter method encodeNonFormula.

private void encodeNonFormula(RExpEncoder encoder) {
    RGenericVector svm = getObject();
    RDoubleVector type = (RDoubleVector) svm.getValue("type");
    RDoubleVector sv = (RDoubleVector) svm.getValue("SV");
    RVector<?> levels = (RVector<?>) svm.getValue("levels");
    Type svmType = Type.values()[ValueUtil.asInt(type.asScalar())];
    RStringVector rowNames = sv.dimnames(0);
    RStringVector columnNames = sv.dimnames(1);
    // Dependent variable
    {
        FieldName name = FieldName.create("_target");
        switch(svmType) {
            case C_CLASSIFICATION:
            case NU_CLASSIFICATION:
                {
                    RStringVector stringLevels = (RStringVector) levels;
                    DataField dataField = encoder.createDataField(name, OpType.CATEGORICAL, DataType.STRING, stringLevels.getValues());
                    encoder.setLabel(dataField);
                }
                break;
            case ONE_CLASSIFICATION:
                {
                    encoder.setLabel(new ContinuousLabel(null, DataType.DOUBLE));
                }
                break;
            case EPS_REGRESSION:
            case NU_REGRESSION:
                {
                    DataField dataField = encoder.createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE);
                    encoder.setLabel(dataField);
                }
                break;
        }
    }
    List<Feature> features = new ArrayList<>();
    // Independent variables
    for (int i = 0; i < columnNames.size(); i++) {
        String columnName = columnNames.getValue(i);
        DataField dataField = encoder.createDataField(FieldName.create(columnName), OpType.CONTINUOUS, DataType.DOUBLE);
        features.add(new ContinuousFeature(encoder, dataField));
    }
    features = scale(features, encoder);
    for (Feature feature : features) {
        encoder.addFeature(feature);
    }
}
Also used : ArrayList(java.util.ArrayList) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Feature(org.jpmml.converter.Feature) OpType(org.dmg.pmml.OpType) DataType(org.dmg.pmml.DataType) ContinuousFeature(org.jpmml.converter.ContinuousFeature) DataField(org.dmg.pmml.DataField) FieldName(org.dmg.pmml.FieldName) ContinuousLabel(org.jpmml.converter.ContinuousLabel)

Example 33 with Feature

use of org.jpmml.converter.Feature in project jpmml-r by jpmml.

the class SVMConverter method encodeFormula.

private void encodeFormula(RExpEncoder encoder) {
    RGenericVector svm = getObject();
    RDoubleVector type = (RDoubleVector) svm.getValue("type");
    RDoubleVector sv = (RDoubleVector) svm.getValue("SV");
    RVector<?> levels = (RVector<?>) svm.getValue("levels");
    RExp terms = svm.getValue("terms");
    final RGenericVector xlevels;
    try {
        xlevels = (RGenericVector) svm.getValue("xlevels");
    } catch (IllegalArgumentException iae) {
        throw new IllegalArgumentException("No variable levels information. Please initialize the \'xlevels\' element", iae);
    }
    Type svmType = Type.values()[ValueUtil.asInt(type.asScalar())];
    RStringVector rowNames = sv.dimnames(0);
    RStringVector columnNames = sv.dimnames(1);
    RIntegerVector response = (RIntegerVector) terms.getAttributeValue("response");
    FormulaContext context = new FormulaContext() {

        @Override
        public List<String> getCategories(String variable) {
            if (xlevels.hasValue(variable)) {
                RStringVector levels = (RStringVector) xlevels.getValue(variable);
                return levels.getValues();
            }
            return null;
        }

        @Override
        public RGenericVector getData() {
            return null;
        }
    };
    Formula formula = FormulaUtil.createFormula(terms, context, encoder);
    // Dependent variable
    int responseIndex = response.asScalar();
    if (responseIndex != 0) {
        DataField dataField = (DataField) formula.getField(responseIndex - 1);
        switch(svmType) {
            case C_CLASSIFICATION:
            case NU_CLASSIFICATION:
                {
                    RStringVector stringLevels = (RStringVector) levels;
                    dataField = (DataField) encoder.toCategorical(dataField.getName(), stringLevels.getValues());
                }
                break;
            case ONE_CLASSIFICATION:
                {
                    OpType opType = dataField.getOpType();
                    if (!(OpType.CONTINUOUS).equals(opType)) {
                        throw new IllegalArgumentException();
                    }
                }
                break;
            default:
                break;
        }
        encoder.setLabel(dataField);
    } else {
        switch(svmType) {
            case ONE_CLASSIFICATION:
                break;
            default:
                throw new IllegalArgumentException();
        }
        encoder.setLabel(new ContinuousLabel(null, DataType.DOUBLE));
    }
    List<Feature> features = new ArrayList<>();
    // Independent variables
    for (int i = 0; i < columnNames.size(); i++) {
        String columnName = columnNames.getValue(i);
        Feature feature = formula.resolveFeature(columnName);
        features.add(feature);
    }
    features = scale(features, encoder);
    for (Feature feature : features) {
        encoder.addFeature(feature);
    }
}
Also used : ArrayList(java.util.ArrayList) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Feature(org.jpmml.converter.Feature) OpType(org.dmg.pmml.OpType) DataType(org.dmg.pmml.DataType) DataField(org.dmg.pmml.DataField) OpType(org.dmg.pmml.OpType) ContinuousLabel(org.jpmml.converter.ContinuousLabel)

Example 34 with Feature

use of org.jpmml.converter.Feature in project jpmml-r by jpmml.

the class ScorecardConverter method encodeModel.

@Override
public Scorecard encodeModel(Schema schema) {
    RGenericVector glm = getObject();
    RDoubleVector coefficients = (RDoubleVector) glm.getValue("coefficients");
    RGenericVector family = (RGenericVector) glm.getValue("family");
    RGenericVector scConf;
    try {
        scConf = (RGenericVector) glm.getValue("sc.conf");
    } catch (IllegalArgumentException iae) {
        throw new IllegalArgumentException("No scorecard configuration information. Please initialize the \'sc.conf\' element", iae);
    }
    Double intercept = coefficients.getValue(LMConverter.INTERCEPT, true);
    List<? extends Feature> features = schema.getFeatures();
    if (coefficients.size() != (features.size() + (intercept != null ? 1 : 0))) {
        throw new IllegalArgumentException();
    }
    RNumberVector<?> odds = (RNumberVector<?>) scConf.getValue("odds");
    RNumberVector<?> basePoints = (RNumberVector<?>) scConf.getValue("base_points");
    RNumberVector<?> pdo = (RNumberVector<?>) scConf.getValue("pdo");
    double factor = (pdo.asScalar()).doubleValue() / Math.log(2);
    Map<FieldName, Characteristic> fieldCharacteristics = new LinkedHashMap<>();
    for (Feature feature : features) {
        FieldName name = feature.getName();
        if (!(feature instanceof BinaryFeature)) {
            throw new IllegalArgumentException();
        }
        Double coefficient = getFeatureCoefficient(feature, coefficients);
        Characteristic characteristic = fieldCharacteristics.get(name);
        if (characteristic == null) {
            characteristic = new Characteristic().setName(FeatureUtil.createName("score", feature));
            fieldCharacteristics.put(name, characteristic);
        }
        BinaryFeature binaryFeature = (BinaryFeature) feature;
        SimplePredicate simplePredicate = new SimplePredicate().setField(binaryFeature.getName()).setOperator(SimplePredicate.Operator.EQUAL).setValue(binaryFeature.getValue());
        Attribute attribute = new Attribute().setPartialScore(formatScore(-1d * coefficient * factor)).setPredicate(simplePredicate);
        characteristic.addAttributes(attribute);
    }
    Characteristics characteristics = new Characteristics();
    Collection<Map.Entry<FieldName, Characteristic>> entries = fieldCharacteristics.entrySet();
    for (Map.Entry<FieldName, Characteristic> entry : entries) {
        Characteristic characteristic = entry.getValue();
        Attribute attribute = new Attribute().setPartialScore(0d).setPredicate(new True());
        characteristic.addAttributes(attribute);
        characteristics.addCharacteristics(characteristic);
    }
    Scorecard scorecard = new Scorecard(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()), characteristics).setInitialScore(formatScore((basePoints.asScalar()).doubleValue() - Math.log((odds.asScalar()).doubleValue()) * factor - (intercept != null ? intercept * factor : 0))).setUseReasonCodes(false);
    return scorecard;
}
Also used : Attribute(org.dmg.pmml.scorecard.Attribute) Characteristic(org.dmg.pmml.scorecard.Characteristic) True(org.dmg.pmml.True) BinaryFeature(org.jpmml.converter.BinaryFeature) Feature(org.jpmml.converter.Feature) BinaryFeature(org.jpmml.converter.BinaryFeature) SimplePredicate(org.dmg.pmml.SimplePredicate) LinkedHashMap(java.util.LinkedHashMap) Characteristics(org.dmg.pmml.scorecard.Characteristics) Scorecard(org.dmg.pmml.scorecard.Scorecard) FieldName(org.dmg.pmml.FieldName) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 35 with Feature

use of org.jpmml.converter.Feature in project jpmml-r by jpmml.

the class XGBoostConverter method encodeSchema.

@Override
public void encodeSchema(RExpEncoder encoder) {
    RGenericVector booster = getObject();
    RGenericVector schema = (RGenericVector) booster.getValue("schema", true);
    RVector<?> fmap;
    try {
        fmap = (RVector<?>) booster.getValue("fmap");
    } catch (IllegalArgumentException iae) {
        throw new IllegalArgumentException("No feature map information. Please initialize the \'fmap\' element");
    }
    FeatureMap featureMap;
    try {
        featureMap = loadFeatureMap(fmap);
    } catch (IOException ioe) {
        throw new IllegalArgumentException(ioe);
    }
    if (schema != null) {
        RVector<?> missing = (RVector<?>) schema.getValue("missing", true);
        if (missing != null) {
            featureMap.addMissingValue(ValueUtil.formatValue(missing.asScalar()));
        }
    }
    Learner learner = ensureLearner();
    // Dependent variable
    {
        ObjFunction obj = learner.getObj();
        FieldName targetField = FieldName.create("_target");
        List<String> targetCategories = null;
        if (schema != null) {
            RStringVector responseName = (RStringVector) schema.getValue("response_name", true);
            RStringVector responseLevels = (RStringVector) schema.getValue("response_levels", true);
            if (responseName != null) {
                targetField = FieldName.create(responseName.asScalar());
            }
            if (responseLevels != null) {
                targetCategories = responseLevels.getValues();
            }
        }
        Label label = obj.encodeLabel(targetField, targetCategories, encoder);
        encoder.setLabel(label);
    }
    // Independent variables
    {
        List<Feature> features = featureMap.encodeFeatures(encoder);
        for (Feature feature : features) {
            encoder.addFeature(feature);
        }
    }
}
Also used : Label(org.jpmml.converter.Label) IOException(java.io.IOException) Feature(org.jpmml.converter.Feature) Learner(org.jpmml.xgboost.Learner) FeatureMap(org.jpmml.xgboost.FeatureMap) List(java.util.List) FieldName(org.dmg.pmml.FieldName) ObjFunction(org.jpmml.xgboost.ObjFunction)

Aggregations

Feature (org.jpmml.converter.Feature)53 ContinuousFeature (org.jpmml.converter.ContinuousFeature)30 ArrayList (java.util.ArrayList)27 CategoricalFeature (org.jpmml.converter.CategoricalFeature)19 DerivedField (org.dmg.pmml.DerivedField)14 DataField (org.dmg.pmml.DataField)13 FieldName (org.dmg.pmml.FieldName)10 Apply (org.dmg.pmml.Apply)9 BooleanFeature (org.jpmml.converter.BooleanFeature)9 BinaryFeature (org.jpmml.converter.BinaryFeature)7 List (java.util.List)6 Expression (org.dmg.pmml.Expression)6 SimplePredicate (org.dmg.pmml.SimplePredicate)6 Vector (org.apache.spark.ml.linalg.Vector)5 Predicate (org.dmg.pmml.Predicate)5 Node (org.dmg.pmml.tree.Node)5 DocumentFeature (org.jpmml.sparkml.DocumentFeature)5 InteractionFeature (org.jpmml.converter.InteractionFeature)4 DocumentBuilder (javax.xml.parsers.DocumentBuilder)3 Transformer (org.apache.spark.ml.Transformer)3