Search in sources :

Example 16 with DataField

use of org.dmg.pmml.DataField in project jpmml-r by jpmml.

the class KMeansConverter method encodeSchema.

@Override
public void encodeSchema(RExpEncoder encoder) {
    RGenericVector kmeans = getObject();
    RDoubleVector centers = (RDoubleVector) kmeans.getValue("centers");
    RStringVector columnNames = centers.dimnames(1);
    for (int i = 0; i < columnNames.size(); i++) {
        String columnName = columnNames.getValue(i);
        DataField dataField = encoder.createDataField(FieldName.create(columnName), OpType.CONTINUOUS, DataType.DOUBLE);
        encoder.addFeature(dataField);
    }
}
Also used : DataField(org.dmg.pmml.DataField)

Example 17 with DataField

use of org.dmg.pmml.DataField in project jpmml-r by jpmml.

the class BinaryTreeConverter method encodeVariableList.

private void encodeVariableList(RGenericVector tree, RExpEncoder encoder) {
    RBooleanVector terminal = (RBooleanVector) tree.getValue("terminal");
    RGenericVector psplit = (RGenericVector) tree.getValue("psplit");
    RGenericVector left = (RGenericVector) tree.getValue("left");
    RGenericVector right = (RGenericVector) tree.getValue("right");
    if ((Boolean.TRUE).equals(terminal.asScalar())) {
        return;
    }
    RNumberVector<?> splitpoint = (RNumberVector<?>) psplit.getValue("splitpoint");
    RStringVector variableName = (RStringVector) psplit.getValue("variableName");
    FieldName name = FieldName.create(variableName.asScalar());
    DataField dataField = encoder.getDataField(name);
    if (dataField == null) {
        if (splitpoint instanceof RIntegerVector) {
            RStringVector levels = (RStringVector) splitpoint.getAttributeValue("levels");
            dataField = encoder.createDataField(name, OpType.CATEGORICAL, null, levels.getValues());
        } else if (splitpoint instanceof RDoubleVector) {
            dataField = encoder.createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE);
        } else {
            throw new IllegalArgumentException();
        }
        encoder.addFeature(dataField);
        this.featureIndexes.put(name, this.featureIndexes.size());
    }
    encodeVariableList(left, encoder);
    encodeVariableList(right, encoder);
}
Also used : DataField(org.dmg.pmml.DataField) FieldName(org.dmg.pmml.FieldName)

Example 18 with DataField

use of org.dmg.pmml.DataField in project jpmml-r by jpmml.

the class EarthConverter method encodeSchema.

@Override
public void encodeSchema(RExpEncoder encoder) {
    RGenericVector earth = getObject();
    RDoubleVector dirs = (RDoubleVector) earth.getValue("dirs");
    RDoubleVector cuts = (RDoubleVector) earth.getValue("cuts");
    RDoubleVector selectedTerms = (RDoubleVector) earth.getValue("selected.terms");
    RDoubleVector coefficients = (RDoubleVector) earth.getValue("coefficients");
    RExp terms = earth.getValue("terms");
    final RGenericVector xlevels;
    try {
        xlevels = (RGenericVector) earth.getValue("xlevels");
    } catch (IllegalArgumentException iae) {
        throw new IllegalArgumentException("No variable levels information. Please initialize the \'xlevels\' element", iae);
    }
    RStringVector dirsRows = dirs.dimnames(0);
    RStringVector dirsColumns = dirs.dimnames(1);
    RStringVector cutsRows = cuts.dimnames(0);
    RStringVector cutsColumns = cuts.dimnames(1);
    if (!(dirsRows.getValues()).equals(cutsRows.getValues()) || !(dirsColumns.getValues()).equals(cutsColumns.getValues())) {
        throw new IllegalArgumentException();
    }
    int rows = dirsRows.size();
    int columns = dirsColumns.size();
    List<String> predictorNames = dirsColumns.getValues();
    FormulaContext context = new FormulaContext() {

        @Override
        public List<String> getCategories(String variable) {
            if (xlevels.hasValue(variable)) {
                RStringVector levels = (RStringVector) xlevels.getValue(variable);
                return levels.getValues();
            }
            return null;
        }

        @Override
        public RGenericVector getData() {
            return null;
        }
    };
    Formula formula = FormulaUtil.createFormula(terms, context, encoder);
    // Dependent variable
    {
        RStringVector yNames = coefficients.dimnames(1);
        FieldName name = FieldName.create(yNames.asScalar());
        DataField dataField = (DataField) encoder.getField(name);
        encoder.setLabel(dataField);
    }
    // Independent variables
    for (int i = 1; i < selectedTerms.size(); i++) {
        int termIndex = ValueUtil.asInt(selectedTerms.getValue(i)) - 1;
        List<Double> dirsRow = FortranMatrixUtil.getRow(dirs.getValues(), rows, columns, termIndex);
        List<Double> cutsRow = FortranMatrixUtil.getRow(cuts.getValues(), rows, columns, termIndex);
        List<Feature> features = new ArrayList<>();
        predictors: for (int j = 0; j < predictorNames.size(); j++) {
            String predictorName = predictorNames.get(j);
            int dir = ValueUtil.asInt(dirsRow.get(j));
            double cut = cutsRow.get(j);
            if (dir == 0) {
                continue predictors;
            }
            Feature feature = formula.resolveFeature(predictorName);
            switch(dir) {
                case -1:
                case 1:
                    {
                        feature = feature.toContinuousFeature();
                        FieldName name = FieldName.create(formatHingeFunction(dir, feature, cut));
                        DerivedField derivedField = encoder.getDerivedField(name);
                        if (derivedField == null) {
                            Apply apply = createHingeFunction(dir, feature, cut);
                            derivedField = encoder.createDerivedField(name, OpType.CONTINUOUS, DataType.DOUBLE, apply);
                        }
                        feature = new ContinuousFeature(encoder, derivedField);
                    }
                    break;
                case 2:
                    break;
                default:
                    throw new IllegalArgumentException();
            }
            features.add(feature);
        }
        Feature feature;
        if (features.size() == 1) {
            feature = features.get(0);
        } else if (features.size() > 1) {
            feature = new InteractionFeature(encoder, FieldName.create(dirsRows.getValue(i)), DataType.DOUBLE, features);
        } else {
            throw new IllegalArgumentException();
        }
        encoder.addFeature(feature);
    }
}
Also used : InteractionFeature(org.jpmml.converter.InteractionFeature) Apply(org.dmg.pmml.Apply) ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) InteractionFeature(org.jpmml.converter.InteractionFeature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) DataField(org.dmg.pmml.DataField) FieldName(org.dmg.pmml.FieldName) DerivedField(org.dmg.pmml.DerivedField)

Example 19 with DataField

use of org.dmg.pmml.DataField in project jpmml-r by jpmml.

the class SVMConverter method encodeNonFormula.

private void encodeNonFormula(RExpEncoder encoder) {
    RGenericVector svm = getObject();
    RDoubleVector type = (RDoubleVector) svm.getValue("type");
    RDoubleVector sv = (RDoubleVector) svm.getValue("SV");
    RVector<?> levels = (RVector<?>) svm.getValue("levels");
    Type svmType = Type.values()[ValueUtil.asInt(type.asScalar())];
    RStringVector rowNames = sv.dimnames(0);
    RStringVector columnNames = sv.dimnames(1);
    // Dependent variable
    {
        FieldName name = FieldName.create("_target");
        switch(svmType) {
            case C_CLASSIFICATION:
            case NU_CLASSIFICATION:
                {
                    RStringVector stringLevels = (RStringVector) levels;
                    DataField dataField = encoder.createDataField(name, OpType.CATEGORICAL, DataType.STRING, stringLevels.getValues());
                    encoder.setLabel(dataField);
                }
                break;
            case ONE_CLASSIFICATION:
                {
                    encoder.setLabel(new ContinuousLabel(null, DataType.DOUBLE));
                }
                break;
            case EPS_REGRESSION:
            case NU_REGRESSION:
                {
                    DataField dataField = encoder.createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE);
                    encoder.setLabel(dataField);
                }
                break;
        }
    }
    List<Feature> features = new ArrayList<>();
    // Independent variables
    for (int i = 0; i < columnNames.size(); i++) {
        String columnName = columnNames.getValue(i);
        DataField dataField = encoder.createDataField(FieldName.create(columnName), OpType.CONTINUOUS, DataType.DOUBLE);
        features.add(new ContinuousFeature(encoder, dataField));
    }
    features = scale(features, encoder);
    for (Feature feature : features) {
        encoder.addFeature(feature);
    }
}
Also used : ArrayList(java.util.ArrayList) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Feature(org.jpmml.converter.Feature) OpType(org.dmg.pmml.OpType) DataType(org.dmg.pmml.DataType) ContinuousFeature(org.jpmml.converter.ContinuousFeature) DataField(org.dmg.pmml.DataField) FieldName(org.dmg.pmml.FieldName) ContinuousLabel(org.jpmml.converter.ContinuousLabel)

Example 20 with DataField

use of org.dmg.pmml.DataField in project jpmml-r by jpmml.

the class SVMConverter method encodeFormula.

private void encodeFormula(RExpEncoder encoder) {
    RGenericVector svm = getObject();
    RDoubleVector type = (RDoubleVector) svm.getValue("type");
    RDoubleVector sv = (RDoubleVector) svm.getValue("SV");
    RVector<?> levels = (RVector<?>) svm.getValue("levels");
    RExp terms = svm.getValue("terms");
    final RGenericVector xlevels;
    try {
        xlevels = (RGenericVector) svm.getValue("xlevels");
    } catch (IllegalArgumentException iae) {
        throw new IllegalArgumentException("No variable levels information. Please initialize the \'xlevels\' element", iae);
    }
    Type svmType = Type.values()[ValueUtil.asInt(type.asScalar())];
    RStringVector rowNames = sv.dimnames(0);
    RStringVector columnNames = sv.dimnames(1);
    RIntegerVector response = (RIntegerVector) terms.getAttributeValue("response");
    FormulaContext context = new FormulaContext() {

        @Override
        public List<String> getCategories(String variable) {
            if (xlevels.hasValue(variable)) {
                RStringVector levels = (RStringVector) xlevels.getValue(variable);
                return levels.getValues();
            }
            return null;
        }

        @Override
        public RGenericVector getData() {
            return null;
        }
    };
    Formula formula = FormulaUtil.createFormula(terms, context, encoder);
    // Dependent variable
    int responseIndex = response.asScalar();
    if (responseIndex != 0) {
        DataField dataField = (DataField) formula.getField(responseIndex - 1);
        switch(svmType) {
            case C_CLASSIFICATION:
            case NU_CLASSIFICATION:
                {
                    RStringVector stringLevels = (RStringVector) levels;
                    dataField = (DataField) encoder.toCategorical(dataField.getName(), stringLevels.getValues());
                }
                break;
            case ONE_CLASSIFICATION:
                {
                    OpType opType = dataField.getOpType();
                    if (!(OpType.CONTINUOUS).equals(opType)) {
                        throw new IllegalArgumentException();
                    }
                }
                break;
            default:
                break;
        }
        encoder.setLabel(dataField);
    } else {
        switch(svmType) {
            case ONE_CLASSIFICATION:
                break;
            default:
                throw new IllegalArgumentException();
        }
        encoder.setLabel(new ContinuousLabel(null, DataType.DOUBLE));
    }
    List<Feature> features = new ArrayList<>();
    // Independent variables
    for (int i = 0; i < columnNames.size(); i++) {
        String columnName = columnNames.getValue(i);
        Feature feature = formula.resolveFeature(columnName);
        features.add(feature);
    }
    features = scale(features, encoder);
    for (Feature feature : features) {
        encoder.addFeature(feature);
    }
}
Also used : ArrayList(java.util.ArrayList) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Feature(org.jpmml.converter.Feature) OpType(org.dmg.pmml.OpType) DataType(org.dmg.pmml.DataType) DataField(org.dmg.pmml.DataField) OpType(org.dmg.pmml.OpType) ContinuousLabel(org.jpmml.converter.ContinuousLabel)

Aggregations

DataField (org.dmg.pmml.DataField)26 Feature (org.jpmml.converter.Feature)13 FieldName (org.dmg.pmml.FieldName)12 ArrayList (java.util.ArrayList)9 ContinuousFeature (org.jpmml.converter.ContinuousFeature)8 CategoricalFeature (org.jpmml.converter.CategoricalFeature)5 DataType (org.dmg.pmml.DataType)4 DerivedField (org.dmg.pmml.DerivedField)4 OpType (org.dmg.pmml.OpType)4 Apply (org.dmg.pmml.Apply)3 CategoricalLabel (org.jpmml.converter.CategoricalLabel)3 ContinuousLabel (org.jpmml.converter.ContinuousLabel)3 Label (org.jpmml.converter.Label)3 Function (com.google.common.base.Function)2 MiningFunction (org.dmg.pmml.MiningFunction)2 BooleanFeature (org.jpmml.converter.BooleanFeature)2 InputField (org.jpmml.evaluator.InputField)2 OutputField (org.jpmml.evaluator.OutputField)2 TargetField (org.jpmml.evaluator.TargetField)2 Field (org.openscoring.common.Field)2