Search in sources :

Example 56 with DataField

use of org.dmg.pmml.DataField in project jpmml-r by jpmml.

the class KMeansConverter method encodeSchema.

@Override
public void encodeSchema(RExpEncoder encoder) {
    RGenericVector kmeans = getObject();
    RDoubleVector centers = (RDoubleVector) kmeans.getValue("centers");
    RStringVector columnNames = centers.dimnames(1);
    for (int i = 0; i < columnNames.size(); i++) {
        String columnName = columnNames.getValue(i);
        DataField dataField = encoder.createDataField(FieldName.create(columnName), OpType.CONTINUOUS, DataType.DOUBLE);
        encoder.addFeature(dataField);
    }
}
Also used : DataField(org.dmg.pmml.DataField)

Example 57 with DataField

use of org.dmg.pmml.DataField in project jpmml-r by jpmml.

the class BinaryTreeConverter method encodeVariableList.

private void encodeVariableList(RGenericVector tree, RExpEncoder encoder) {
    RBooleanVector terminal = (RBooleanVector) tree.getValue("terminal");
    RGenericVector psplit = (RGenericVector) tree.getValue("psplit");
    RGenericVector left = (RGenericVector) tree.getValue("left");
    RGenericVector right = (RGenericVector) tree.getValue("right");
    if ((Boolean.TRUE).equals(terminal.asScalar())) {
        return;
    }
    RNumberVector<?> splitpoint = (RNumberVector<?>) psplit.getValue("splitpoint");
    RStringVector variableName = (RStringVector) psplit.getValue("variableName");
    FieldName name = FieldName.create(variableName.asScalar());
    DataField dataField = encoder.getDataField(name);
    if (dataField == null) {
        if (splitpoint instanceof RIntegerVector) {
            RStringVector levels = (RStringVector) splitpoint.getAttributeValue("levels");
            dataField = encoder.createDataField(name, OpType.CATEGORICAL, null, levels.getValues());
        } else if (splitpoint instanceof RDoubleVector) {
            dataField = encoder.createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE);
        } else {
            throw new IllegalArgumentException();
        }
        encoder.addFeature(dataField);
        this.featureIndexes.put(name, this.featureIndexes.size());
    }
    encodeVariableList(left, encoder);
    encodeVariableList(right, encoder);
}
Also used : DataField(org.dmg.pmml.DataField) FieldName(org.dmg.pmml.FieldName)

Example 58 with DataField

use of org.dmg.pmml.DataField in project jpmml-r by jpmml.

the class EarthConverter method encodeSchema.

@Override
public void encodeSchema(RExpEncoder encoder) {
    RGenericVector earth = getObject();
    RDoubleVector dirs = (RDoubleVector) earth.getValue("dirs");
    RDoubleVector cuts = (RDoubleVector) earth.getValue("cuts");
    RDoubleVector selectedTerms = (RDoubleVector) earth.getValue("selected.terms");
    RDoubleVector coefficients = (RDoubleVector) earth.getValue("coefficients");
    RExp terms = earth.getValue("terms");
    final RGenericVector xlevels;
    try {
        xlevels = (RGenericVector) earth.getValue("xlevels");
    } catch (IllegalArgumentException iae) {
        throw new IllegalArgumentException("No variable levels information. Please initialize the \'xlevels\' element", iae);
    }
    RStringVector dirsRows = dirs.dimnames(0);
    RStringVector dirsColumns = dirs.dimnames(1);
    RStringVector cutsRows = cuts.dimnames(0);
    RStringVector cutsColumns = cuts.dimnames(1);
    if (!(dirsRows.getValues()).equals(cutsRows.getValues()) || !(dirsColumns.getValues()).equals(cutsColumns.getValues())) {
        throw new IllegalArgumentException();
    }
    int rows = dirsRows.size();
    int columns = dirsColumns.size();
    List<String> predictorNames = dirsColumns.getValues();
    FormulaContext context = new FormulaContext() {

        @Override
        public List<String> getCategories(String variable) {
            if (xlevels.hasValue(variable)) {
                RStringVector levels = (RStringVector) xlevels.getValue(variable);
                return levels.getValues();
            }
            return null;
        }

        @Override
        public RGenericVector getData() {
            return null;
        }
    };
    Formula formula = FormulaUtil.createFormula(terms, context, encoder);
    // Dependent variable
    {
        RStringVector yNames = coefficients.dimnames(1);
        FieldName name = FieldName.create(yNames.asScalar());
        DataField dataField = (DataField) encoder.getField(name);
        encoder.setLabel(dataField);
    }
    // Independent variables
    for (int i = 1; i < selectedTerms.size(); i++) {
        int termIndex = ValueUtil.asInt(selectedTerms.getValue(i)) - 1;
        List<Double> dirsRow = FortranMatrixUtil.getRow(dirs.getValues(), rows, columns, termIndex);
        List<Double> cutsRow = FortranMatrixUtil.getRow(cuts.getValues(), rows, columns, termIndex);
        List<Feature> features = new ArrayList<>();
        predictors: for (int j = 0; j < predictorNames.size(); j++) {
            String predictorName = predictorNames.get(j);
            int dir = ValueUtil.asInt(dirsRow.get(j));
            double cut = cutsRow.get(j);
            if (dir == 0) {
                continue predictors;
            }
            Feature feature = formula.resolveFeature(predictorName);
            switch(dir) {
                case -1:
                case 1:
                    {
                        feature = feature.toContinuousFeature();
                        FieldName name = FieldName.create(formatHingeFunction(dir, feature, cut));
                        DerivedField derivedField = encoder.getDerivedField(name);
                        if (derivedField == null) {
                            Apply apply = createHingeFunction(dir, feature, cut);
                            derivedField = encoder.createDerivedField(name, OpType.CONTINUOUS, DataType.DOUBLE, apply);
                        }
                        feature = new ContinuousFeature(encoder, derivedField);
                    }
                    break;
                case 2:
                    break;
                default:
                    throw new IllegalArgumentException();
            }
            features.add(feature);
        }
        Feature feature;
        if (features.size() == 1) {
            feature = features.get(0);
        } else if (features.size() > 1) {
            feature = new InteractionFeature(encoder, FieldName.create(dirsRows.getValue(i)), DataType.DOUBLE, features);
        } else {
            throw new IllegalArgumentException();
        }
        encoder.addFeature(feature);
    }
}
Also used : InteractionFeature(org.jpmml.converter.InteractionFeature) Apply(org.dmg.pmml.Apply) ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) InteractionFeature(org.jpmml.converter.InteractionFeature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) DataField(org.dmg.pmml.DataField) FieldName(org.dmg.pmml.FieldName) DerivedField(org.dmg.pmml.DerivedField)

Example 59 with DataField

use of org.dmg.pmml.DataField in project jpmml-r by jpmml.

the class SVMConverter method encodeNonFormula.

private void encodeNonFormula(RExpEncoder encoder) {
    RGenericVector svm = getObject();
    RDoubleVector type = (RDoubleVector) svm.getValue("type");
    RDoubleVector sv = (RDoubleVector) svm.getValue("SV");
    RVector<?> levels = (RVector<?>) svm.getValue("levels");
    Type svmType = Type.values()[ValueUtil.asInt(type.asScalar())];
    RStringVector rowNames = sv.dimnames(0);
    RStringVector columnNames = sv.dimnames(1);
    // Dependent variable
    {
        FieldName name = FieldName.create("_target");
        switch(svmType) {
            case C_CLASSIFICATION:
            case NU_CLASSIFICATION:
                {
                    RStringVector stringLevels = (RStringVector) levels;
                    DataField dataField = encoder.createDataField(name, OpType.CATEGORICAL, DataType.STRING, stringLevels.getValues());
                    encoder.setLabel(dataField);
                }
                break;
            case ONE_CLASSIFICATION:
                {
                    encoder.setLabel(new ContinuousLabel(null, DataType.DOUBLE));
                }
                break;
            case EPS_REGRESSION:
            case NU_REGRESSION:
                {
                    DataField dataField = encoder.createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE);
                    encoder.setLabel(dataField);
                }
                break;
        }
    }
    List<Feature> features = new ArrayList<>();
    // Independent variables
    for (int i = 0; i < columnNames.size(); i++) {
        String columnName = columnNames.getValue(i);
        DataField dataField = encoder.createDataField(FieldName.create(columnName), OpType.CONTINUOUS, DataType.DOUBLE);
        features.add(new ContinuousFeature(encoder, dataField));
    }
    features = scale(features, encoder);
    for (Feature feature : features) {
        encoder.addFeature(feature);
    }
}
Also used : ArrayList(java.util.ArrayList) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Feature(org.jpmml.converter.Feature) OpType(org.dmg.pmml.OpType) DataType(org.dmg.pmml.DataType) ContinuousFeature(org.jpmml.converter.ContinuousFeature) DataField(org.dmg.pmml.DataField) FieldName(org.dmg.pmml.FieldName) ContinuousLabel(org.jpmml.converter.ContinuousLabel)

Example 60 with DataField

use of org.dmg.pmml.DataField in project jpmml-r by jpmml.

the class SVMConverter method encodeFormula.

private void encodeFormula(RExpEncoder encoder) {
    RGenericVector svm = getObject();
    RDoubleVector type = (RDoubleVector) svm.getValue("type");
    RDoubleVector sv = (RDoubleVector) svm.getValue("SV");
    RVector<?> levels = (RVector<?>) svm.getValue("levels");
    RExp terms = svm.getValue("terms");
    final RGenericVector xlevels;
    try {
        xlevels = (RGenericVector) svm.getValue("xlevels");
    } catch (IllegalArgumentException iae) {
        throw new IllegalArgumentException("No variable levels information. Please initialize the \'xlevels\' element", iae);
    }
    Type svmType = Type.values()[ValueUtil.asInt(type.asScalar())];
    RStringVector rowNames = sv.dimnames(0);
    RStringVector columnNames = sv.dimnames(1);
    RIntegerVector response = (RIntegerVector) terms.getAttributeValue("response");
    FormulaContext context = new FormulaContext() {

        @Override
        public List<String> getCategories(String variable) {
            if (xlevels.hasValue(variable)) {
                RStringVector levels = (RStringVector) xlevels.getValue(variable);
                return levels.getValues();
            }
            return null;
        }

        @Override
        public RGenericVector getData() {
            return null;
        }
    };
    Formula formula = FormulaUtil.createFormula(terms, context, encoder);
    // Dependent variable
    int responseIndex = response.asScalar();
    if (responseIndex != 0) {
        DataField dataField = (DataField) formula.getField(responseIndex - 1);
        switch(svmType) {
            case C_CLASSIFICATION:
            case NU_CLASSIFICATION:
                {
                    RStringVector stringLevels = (RStringVector) levels;
                    dataField = (DataField) encoder.toCategorical(dataField.getName(), stringLevels.getValues());
                }
                break;
            case ONE_CLASSIFICATION:
                {
                    OpType opType = dataField.getOpType();
                    if (!(OpType.CONTINUOUS).equals(opType)) {
                        throw new IllegalArgumentException();
                    }
                }
                break;
            default:
                break;
        }
        encoder.setLabel(dataField);
    } else {
        switch(svmType) {
            case ONE_CLASSIFICATION:
                break;
            default:
                throw new IllegalArgumentException();
        }
        encoder.setLabel(new ContinuousLabel(null, DataType.DOUBLE));
    }
    List<Feature> features = new ArrayList<>();
    // Independent variables
    for (int i = 0; i < columnNames.size(); i++) {
        String columnName = columnNames.getValue(i);
        Feature feature = formula.resolveFeature(columnName);
        features.add(feature);
    }
    features = scale(features, encoder);
    for (Feature feature : features) {
        encoder.addFeature(feature);
    }
}
Also used : ArrayList(java.util.ArrayList) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Feature(org.jpmml.converter.Feature) OpType(org.dmg.pmml.OpType) DataType(org.dmg.pmml.DataType) DataField(org.dmg.pmml.DataField) OpType(org.dmg.pmml.OpType) ContinuousLabel(org.jpmml.converter.ContinuousLabel)

Aggregations

DataField (org.dmg.pmml.DataField)101 Test (org.junit.Test)51 DataDictionary (org.dmg.pmml.DataDictionary)42 MiningField (org.dmg.pmml.MiningField)42 MiningSchema (org.dmg.pmml.MiningSchema)30 PMMLModelTestUtils.getRandomDataField (org.kie.pmml.compiler.api.testutils.PMMLModelTestUtils.getRandomDataField)28 RegressionModel (org.dmg.pmml.regression.RegressionModel)27 CommonTestingUtils.getFieldsFromDataDictionary (org.kie.pmml.compiler.api.CommonTestingUtils.getFieldsFromDataDictionary)27 FieldName (org.dmg.pmml.FieldName)24 Model (org.dmg.pmml.Model)24 PMMLModelTestUtils.getDataField (org.kie.pmml.compiler.api.testutils.PMMLModelTestUtils.getDataField)22 DataType (org.dmg.pmml.DataType)19 OutputField (org.dmg.pmml.OutputField)19 PMMLModelTestUtils.getRandomMiningField (org.kie.pmml.compiler.api.testutils.PMMLModelTestUtils.getRandomMiningField)19 PMMLModelTestUtils.getMiningField (org.kie.pmml.compiler.api.testutils.PMMLModelTestUtils.getMiningField)18 ArrayList (java.util.ArrayList)17 List (java.util.List)17 PMML (org.dmg.pmml.PMML)17 Collectors (java.util.stream.Collectors)16 OpType (org.dmg.pmml.OpType)15