Search in sources :

Example 41 with DerivedField

use of org.dmg.pmml.DerivedField in project shifu by ShifuML.

the class NeuralNetworkModelIntegrator method getNeuralInputs.

private NeuralInputs getNeuralInputs(final NeuralNetwork model) {
    NeuralInputs nnInputs = new NeuralInputs();
    // get HashMap for local transform and MiningSchema fields
    HashMap<FieldName, FieldName> reversMiningTransformMap = new HashMap<FieldName, FieldName>();
    HashMap<FieldName, List<FieldName>> treeMapOfTransform = new HashMap<FieldName, List<FieldName>>();
    for (DerivedField dField : model.getLocalTransformations().getDerivedFields()) {
        // Apply z-scale normalization on numerical variables
        FieldName parentField = null;
        if (dField.getExpression() instanceof NormContinuous) {
            parentField = ((NormContinuous) dField.getExpression()).getField();
            reversMiningTransformMap.put(dField.getName(), parentField);
        } else // Apply bin map on categorical variables
        if (dField.getExpression() instanceof MapValues) {
            parentField = ((MapValues) dField.getExpression()).getFieldColumnPairs().get(0).getField();
            reversMiningTransformMap.put(dField.getName(), parentField);
        } else if (dField.getExpression() instanceof Discretize) {
            parentField = ((Discretize) dField.getExpression()).getField();
            reversMiningTransformMap.put(dField.getName(), parentField);
        }
        List<FieldName> fieldNames = treeMapOfTransform.get(parentField);
        if (fieldNames == null) {
            fieldNames = new ArrayList<FieldName>();
        }
        fieldNames.add(dField.getName());
        treeMapOfTransform.put(parentField, fieldNames);
    }
    // comment here
    List<MiningField> miningList = model.getMiningSchema().getMiningFields();
    int index = 0;
    for (DerivedField dField : model.getLocalTransformations().getDerivedFields()) {
        List<FieldName> list = treeMapOfTransform.get(dField.getName());
        boolean isLeaf = (list == null || list.size() == 0);
        FieldName root = getRoot(dField.getName(), reversMiningTransformMap);
        if (isLeaf && isRootInMiningList(root, miningList)) {
            DerivedField field = new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE).setName(dField.getName()).setExpression(new FieldRef(dField.getName()));
            nnInputs.addNeuralInputs(new NeuralInput("0," + (index++), field));
        }
    }
    DerivedField field = new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE).setName(new FieldName(PluginConstants.biasValue)).setExpression(new FieldRef(new FieldName(PluginConstants.biasValue)));
    nnInputs.addNeuralInputs(new NeuralInput(PluginConstants.biasValue, field));
    return nnInputs;
}
Also used : NormContinuous(org.dmg.pmml.NormContinuous) MiningField(org.dmg.pmml.MiningField) FieldRef(org.dmg.pmml.FieldRef) NeuralInputs(org.dmg.pmml.neural_network.NeuralInputs) HashMap(java.util.HashMap) MapValues(org.dmg.pmml.MapValues) Discretize(org.dmg.pmml.Discretize) ArrayList(java.util.ArrayList) List(java.util.List) FieldName(org.dmg.pmml.FieldName) DerivedField(org.dmg.pmml.DerivedField) NeuralInput(org.dmg.pmml.neural_network.NeuralInput)

Example 42 with DerivedField

use of org.dmg.pmml.DerivedField in project jpmml-r by jpmml.

the class FormulaUtil method createFormula.

public static Formula createFormula(RExp terms, FormulaContext context, RExpEncoder encoder) {
    Formula formula = new Formula(encoder);
    RIntegerVector factors = terms.getIntegerAttribute("factors");
    RStringVector dataClasses = terms.getStringAttribute("dataClasses", false);
    RStringVector variableRows = factors.dimnames(0);
    RStringVector termColumns = factors.dimnames(1);
    VariableMap expressionFields = new VariableMap();
    for (int i = 0; i < variableRows.size(); i++) {
        String variable = variableRows.getDequotedValue(i);
        String name = variable;
        OpType opType = OpType.CONTINUOUS;
        DataType dataType;
        if (dataClasses != null) {
            dataType = RExpUtil.getDataType(dataClasses.getElement(variable));
        } else {
            RVector<?> data = context.getData(name);
            if (data != null) {
                dataType = data.getDataType();
            } else {
                throw new IllegalArgumentException();
            }
        }
        List<String> categories = context.getCategories(variable);
        if (categories != null && categories.size() > 0) {
            opType = OpType.CATEGORICAL;
        }
        Expression expression = null;
        String shortName = name;
        expression: if ((variable.indexOf('(') > -1 && variable.indexOf(')') > -1) || (variable.indexOf(' ') > -1)) {
            try {
                expression = ExpressionTranslator.translateExpression(variable);
            } catch (Exception e) {
                break expression;
            }
            FunctionExpression functionExpression;
            if (expression instanceof FunctionExpression) {
                functionExpression = (FunctionExpression) expression;
            } else {
                FunctionExpression.Argument xArgument = new FunctionExpression.Argument("x", expression) {

                    @Override
                    public String formatExpression() {
                        return variable;
                    }
                };
                functionExpression = new FunctionExpression("base", "I", Collections.singletonList(xArgument));
            }
            if (functionExpression.hasId("base", "cut")) {
                expression = encodeCutExpression(functionExpression, categories, expressionFields, encoder);
            } else if (functionExpression.hasId("base", "I")) {
                expression = encodeIdentityExpression(functionExpression, expressionFields, encoder);
            } else if (functionExpression.hasId("base", "ifelse")) {
                expression = encodeIfElseExpression(functionExpression, expressionFields, encoder);
            } else if (functionExpression.hasId("plyr", "mapvalues")) {
                expression = encodeMapValuesExpression(functionExpression, categories, expressionFields, encoder);
            } else if (functionExpression.hasId("plyr", "revalue")) {
                expression = encodeReValueExpression(functionExpression, categories, expressionFields, encoder);
            } else {
                expression = null;
                break expression;
            }
            FunctionExpression.Argument xArgument = functionExpression.getArgument("x", 0);
            String value = (xArgument.formatExpression()).trim();
            shortName = (functionExpression.hasId("base", "I") ? value : (functionExpression.getFunction() + "(" + value + ")"));
        }
        List<String> categoryNames;
        List<?> categoryValues;
        if (dataType == DataType.BOOLEAN) {
            opType = OpType.CATEGORICAL;
            categoryNames = Arrays.asList("FALSE", "TRUE");
            categoryValues = Arrays.asList(Boolean.FALSE, Boolean.TRUE);
        } else {
            categoryNames = categories;
            categoryValues = categories;
        }
        if (expression != null) {
            DerivedField derivedField = encoder.createDerivedField(name, opType, dataType, expression).addExtensions(PMMLUtil.createExtension("variable", (Object) variable));
            if (categoryNames != null && categoryNames.size() > 0) {
                formula.addField(derivedField, categoryNames, categoryValues);
            } else {
                formula.addField(derivedField);
            }
            if (!(name).equals(shortName)) {
                encoder.renameField(name, shortName);
            }
        } else {
            if (categoryNames != null && categoryNames.size() > 0) {
                DataField dataField = encoder.createDataField(name, OpType.CATEGORICAL, dataType, categories);
                formula.addField(dataField, categoryNames, categoryValues);
            } else {
                DataField dataField = encoder.createDataField(name, OpType.CONTINUOUS, dataType);
                formula.addField(dataField);
            }
        }
    }
    Collection<Map.Entry<String, List<String>>> entries = expressionFields.entrySet();
    for (Map.Entry<String, List<String>> entry : entries) {
        String name = entry.getKey();
        List<String> categories = entry.getValue();
        DataField dataField = encoder.getDataField(name);
        if (dataField == null) {
            OpType opType = OpType.CONTINUOUS;
            DataType dataType = DataType.DOUBLE;
            if (categories != null && categories.size() > 0) {
                opType = OpType.CATEGORICAL;
            }
            RVector<?> data = context.getData(name);
            if (data != null) {
                dataType = data.getDataType();
            }
            dataField = encoder.createDataField(name, opType, dataType, categories);
        }
    }
    return formula;
}
Also used : DataType(org.dmg.pmml.DataType) ArrayList(java.util.ArrayList) List(java.util.List) DataField(org.dmg.pmml.DataField) Expression(org.dmg.pmml.Expression) OpType(org.dmg.pmml.OpType) DerivedField(org.dmg.pmml.DerivedField) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 43 with DerivedField

use of org.dmg.pmml.DerivedField in project jpmml-r by jpmml.

the class PreProcessEncoder method addFeature.

@Override
public void addFeature(Feature feature) {
    String name = FeatureUtil.getName(feature);
    DataField dataField = getDataField(name);
    if (dataField != null) {
        Expression expression = feature.ref();
        Expression transformedExpression = encodeExpression(name, expression);
        if (!(expression).equals(transformedExpression)) {
            DerivedField derivedField = createDerivedField(FieldNameUtil.create("preProcess", feature), OpType.CONTINUOUS, DataType.DOUBLE, transformedExpression);
            feature = new ContinuousFeature(PreProcessEncoder.this, derivedField);
        }
    }
    super.addFeature(feature);
}
Also used : ContinuousFeature(org.jpmml.converter.ContinuousFeature) DataField(org.dmg.pmml.DataField) Expression(org.dmg.pmml.Expression) DerivedField(org.dmg.pmml.DerivedField)

Example 44 with DerivedField

use of org.dmg.pmml.DerivedField in project jpmml-r by jpmml.

the class RExpEncoder method addFields.

public void addFields(RExpEncoder encoder) {
    Map<String, DataField> dataFields = encoder.getDataFields();
    Map<String, DerivedField> derivedFields = encoder.getDerivedFields();
    for (String name : dataFields.keySet()) {
        DataField dataField = getDataField(name);
        if (dataField == null) {
            dataField = dataFields.get(name);
            addDataField(dataField);
        }
    }
    for (String name : derivedFields.keySet()) {
        DerivedField derivedField = getDerivedField(name);
        if (derivedField == null) {
            derivedField = derivedFields.get(name);
            addDerivedField(derivedField);
        }
    }
}
Also used : DataField(org.dmg.pmml.DataField) DerivedField(org.dmg.pmml.DerivedField)

Example 45 with DerivedField

use of org.dmg.pmml.DerivedField in project jpmml-r by jpmml.

the class SVMConverter method scaleFeatures.

private void scaleFeatures(RExpEncoder encoder) {
    RGenericVector svm = getObject();
    RDoubleVector sv = svm.getDoubleElement("SV");
    RBooleanVector scaled = svm.getBooleanElement("scaled");
    RGenericVector xScale = svm.getGenericElement("x.scale");
    RStringVector rowNames = sv.dimnames(0);
    RStringVector columnNames = sv.dimnames(1);
    List<Feature> features = encoder.getFeatures();
    if ((scaled.size() != columnNames.size()) || (scaled.size() != features.size())) {
        throw new IllegalArgumentException();
    }
    RDoubleVector xScaledCenter = xScale.getDoubleElement("scaled:center");
    RDoubleVector xScaledScale = xScale.getDoubleElement("scaled:scale");
    for (int i = 0; i < columnNames.size(); i++) {
        String columnName = columnNames.getValue(i);
        if (!scaled.getValue(i)) {
            continue;
        }
        Feature feature = features.get(i);
        Double center = xScaledCenter.getElement(columnName);
        Double scale = xScaledScale.getElement(columnName);
        if (ValueUtil.isZero(center) && ValueUtil.isOne(scale)) {
            continue;
        }
        ContinuousFeature continuousFeature = feature.toContinuousFeature();
        Expression expression = continuousFeature.ref();
        if (!ValueUtil.isZero(center)) {
            expression = PMMLUtil.createApply(PMMLFunctions.SUBTRACT, expression, PMMLUtil.createConstant(center));
        }
        if (!ValueUtil.isOne(scale)) {
            expression = PMMLUtil.createApply(PMMLFunctions.DIVIDE, expression, PMMLUtil.createConstant(scale));
        }
        DerivedField derivedField = encoder.createDerivedField(FieldNameUtil.create("scale", continuousFeature), OpType.CONTINUOUS, DataType.DOUBLE, expression);
        features.set(i, new ContinuousFeature(encoder, derivedField));
    }
}
Also used : ContinuousFeature(org.jpmml.converter.ContinuousFeature) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Expression(org.dmg.pmml.Expression) DerivedField(org.dmg.pmml.DerivedField)

Aggregations

DerivedField (org.dmg.pmml.DerivedField)48 ArrayList (java.util.ArrayList)17 ContinuousFeature (org.jpmml.converter.ContinuousFeature)16 Feature (org.jpmml.converter.Feature)16 Apply (org.dmg.pmml.Apply)10 Expression (org.dmg.pmml.Expression)10 FieldName (org.dmg.pmml.FieldName)9 Test (org.junit.Test)8 KiePMMLDerivedField (org.kie.pmml.commons.transformations.KiePMMLDerivedField)8 Constant (org.dmg.pmml.Constant)7 DataField (org.dmg.pmml.DataField)7 NormContinuous (org.dmg.pmml.NormContinuous)6 BlockStmt (com.github.javaparser.ast.stmt.BlockStmt)5 List (java.util.List)5 CategoricalFeature (org.jpmml.converter.CategoricalFeature)5 Discretize (org.dmg.pmml.Discretize)4 FieldRef (org.dmg.pmml.FieldRef)4 MapValues (org.dmg.pmml.MapValues)4 Statement (com.github.javaparser.ast.stmt.Statement)3 HashMap (java.util.HashMap)3