Search in sources :

Example 1 with Discretize

use of org.dmg.pmml.Discretize in project jpmml-r by jpmml.

the class FormulaUtil method createDiscretize.

private static Discretize createDiscretize(FieldName name, List<String> categories) {
    Discretize discretize = new Discretize(name);
    for (String category : categories) {
        Interval interval = ExpressionTranslator.translateInterval(category);
        DiscretizeBin discretizeBin = new DiscretizeBin(category, interval);
        discretize.addDiscretizeBins(discretizeBin);
    }
    return discretize;
}
Also used : Discretize(org.dmg.pmml.Discretize) DiscretizeBin(org.dmg.pmml.DiscretizeBin) Interval(org.dmg.pmml.Interval)

Example 2 with Discretize

use of org.dmg.pmml.Discretize in project jpmml-sparkml by jpmml.

the class BucketizerConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    Bucketizer transformer = getTransformer();
    Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
    ContinuousFeature continuousFeature = feature.toContinuousFeature();
    Discretize discretize = new Discretize(continuousFeature.getName());
    List<String> categories = new ArrayList<>();
    double[] splits = transformer.getSplits();
    for (int i = 0; i < (splits.length - 1); i++) {
        String category = String.valueOf(i);
        categories.add(category);
        Interval interval = new Interval((i < (splits.length - 2)) ? Interval.Closure.CLOSED_OPEN : Interval.Closure.CLOSED_CLOSED).setLeftMargin(formatMargin(splits[i])).setRightMargin(formatMargin(splits[i + 1]));
        DiscretizeBin discretizeBin = new DiscretizeBin(category, interval);
        discretize.addDiscretizeBins(discretizeBin);
    }
    DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.INTEGER, discretize);
    return Collections.<Feature>singletonList(new CategoricalFeature(encoder, derivedField, categories));
}
Also used : ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Discretize(org.dmg.pmml.Discretize) Bucketizer(org.apache.spark.ml.feature.Bucketizer) DiscretizeBin(org.dmg.pmml.DiscretizeBin) DerivedField(org.dmg.pmml.DerivedField) Interval(org.dmg.pmml.Interval)

Example 3 with Discretize

use of org.dmg.pmml.Discretize in project shifu by ShifuML.

the class PMMLLRModelBuilder method adaptMLModelToPMML.

public RegressionModel adaptMLModelToPMML(ml.shifu.shifu.core.LR lr, RegressionModel pmmlModel) {
    pmmlModel.setNormalizationMethod(NormalizationMethod.LOGIT);
    pmmlModel.setMiningFunction(MiningFunction.REGRESSION);
    RegressionTable table = new RegressionTable();
    table.setIntercept(lr.getBias());
    LocalTransformations lt = pmmlModel.getLocalTransformations();
    List<DerivedField> df = lt.getDerivedFields();
    HashMap<FieldName, FieldName> miningTransformMap = new HashMap<FieldName, FieldName>();
    for (DerivedField dField : df) {
        // Apply z-scale normalization on numerical variables
        if (dField.getExpression() instanceof NormContinuous) {
            miningTransformMap.put(((NormContinuous) dField.getExpression()).getField(), dField.getName());
        } else // Apply bin map on categorical variables
        if (dField.getExpression() instanceof MapValues) {
            miningTransformMap.put(((MapValues) dField.getExpression()).getFieldColumnPairs().get(0).getField(), dField.getName());
        } else if (dField.getExpression() instanceof Discretize) {
            miningTransformMap.put(((Discretize) dField.getExpression()).getField(), dField.getName());
        }
    }
    List<MiningField> miningList = pmmlModel.getMiningSchema().getMiningFields();
    int index = 0;
    for (int i = 0; i < miningList.size(); i++) {
        MiningField mField = miningList.get(i);
        if (mField.getUsageType() != UsageType.ACTIVE)
            continue;
        FieldName mFieldName = mField.getName();
        FieldName fName = mFieldName;
        while (miningTransformMap.containsKey(fName)) {
            fName = miningTransformMap.get(fName);
        }
        NumericPredictor np = new NumericPredictor();
        np.setName(fName);
        np.setCoefficient(lr.getWeights()[index++]);
        table.addNumericPredictors(np);
    }
    pmmlModel.addRegressionTables(table);
    return pmmlModel;
}
Also used : NormContinuous(org.dmg.pmml.NormContinuous) MiningField(org.dmg.pmml.MiningField) HashMap(java.util.HashMap) NumericPredictor(org.dmg.pmml.regression.NumericPredictor) RegressionTable(org.dmg.pmml.regression.RegressionTable) LocalTransformations(org.dmg.pmml.LocalTransformations) MapValues(org.dmg.pmml.MapValues) Discretize(org.dmg.pmml.Discretize) DerivedField(org.dmg.pmml.DerivedField) FieldName(org.dmg.pmml.FieldName)

Example 4 with Discretize

use of org.dmg.pmml.Discretize in project shifu by ShifuML.

the class WoeLocalTransformCreator method createNumericalDerivedField.

/**
 * Create @DerivedField for numerical variable
 *
 * @param config
 *            - ColumnConfig for numerical variable
 * @param cutoff
 *            - cutoff of normalization
 * @param normType
 *            - the normalization method that is used to generate DerivedField
 * @return DerivedField for variable
 */
@Override
protected List<DerivedField> createNumericalDerivedField(ColumnConfig config, double cutoff, ModelNormalizeConf.NormType normType) {
    List<Double> binWoeList = (normType.equals(ModelNormalizeConf.NormType.WOE) ? config.getBinCountWoe() : config.getBinWeightedWoe());
    List<Double> binBoundaryList = config.getBinBoundary();
    List<DiscretizeBin> discretizeBinList = new ArrayList<DiscretizeBin>();
    for (int i = 0; i < binBoundaryList.size(); i++) {
        DiscretizeBin discretizeBin = new DiscretizeBin();
        Interval interval = new Interval();
        if (i == 0) {
            if (binBoundaryList.size() == 1) {
                interval.setClosure(Interval.Closure.OPEN_OPEN).setLeftMargin(Double.NEGATIVE_INFINITY).setRightMargin(Double.POSITIVE_INFINITY);
            } else {
                interval.setClosure(Interval.Closure.OPEN_OPEN).setRightMargin(binBoundaryList.get(i + 1));
            }
        } else if (i == binBoundaryList.size() - 1) {
            interval.setClosure(Interval.Closure.CLOSED_OPEN).setLeftMargin(binBoundaryList.get(i));
        } else {
            interval.setClosure(Interval.Closure.CLOSED_OPEN).setLeftMargin(binBoundaryList.get(i)).setRightMargin(binBoundaryList.get(i + 1));
        }
        discretizeBin.setInterval(interval).setBinValue(Double.toString(binWoeList.get(i)));
        discretizeBinList.add(discretizeBin);
    }
    Discretize discretize = new Discretize();
    discretize.setDataType(DataType.DOUBLE).setField(FieldName.create(NormalUtils.getSimpleColumnName(config, columnConfigList, segmentExpansions, datasetHeaders))).setMapMissingTo(Normalizer.normalize(config, null, cutoff, normType).get(0).toString()).setDefaultValue(Normalizer.normalize(config, null, cutoff, normType).get(0).toString()).addDiscretizeBins(discretizeBinList.toArray(new DiscretizeBin[discretizeBinList.size()]));
    // derived field name is consisted of FieldName and "_zscl"
    List<DerivedField> derivedFields = new ArrayList<DerivedField>();
    derivedFields.add(new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE).setName(FieldName.create(genPmmlColumnName(NormalUtils.getSimpleColumnName(config.getColumnName()), normType))).setExpression(discretize));
    return derivedFields;
}
Also used : ArrayList(java.util.ArrayList) Discretize(org.dmg.pmml.Discretize) DiscretizeBin(org.dmg.pmml.DiscretizeBin) DerivedField(org.dmg.pmml.DerivedField) Interval(org.dmg.pmml.Interval)

Example 5 with Discretize

use of org.dmg.pmml.Discretize in project shifu by ShifuML.

the class NeuralNetworkModelIntegrator method getNeuralInputs.

private NeuralInputs getNeuralInputs(final NeuralNetwork model) {
    NeuralInputs nnInputs = new NeuralInputs();
    // get HashMap for local transform and MiningSchema fields
    HashMap<FieldName, FieldName> reversMiningTransformMap = new HashMap<FieldName, FieldName>();
    HashMap<FieldName, List<FieldName>> treeMapOfTransform = new HashMap<FieldName, List<FieldName>>();
    for (DerivedField dField : model.getLocalTransformations().getDerivedFields()) {
        // Apply z-scale normalization on numerical variables
        FieldName parentField = null;
        if (dField.getExpression() instanceof NormContinuous) {
            parentField = ((NormContinuous) dField.getExpression()).getField();
            reversMiningTransformMap.put(dField.getName(), parentField);
        } else // Apply bin map on categorical variables
        if (dField.getExpression() instanceof MapValues) {
            parentField = ((MapValues) dField.getExpression()).getFieldColumnPairs().get(0).getField();
            reversMiningTransformMap.put(dField.getName(), parentField);
        } else if (dField.getExpression() instanceof Discretize) {
            parentField = ((Discretize) dField.getExpression()).getField();
            reversMiningTransformMap.put(dField.getName(), parentField);
        }
        List<FieldName> fieldNames = treeMapOfTransform.get(parentField);
        if (fieldNames == null) {
            fieldNames = new ArrayList<FieldName>();
        }
        fieldNames.add(dField.getName());
        treeMapOfTransform.put(parentField, fieldNames);
    }
    // comment here
    List<MiningField> miningList = model.getMiningSchema().getMiningFields();
    int index = 0;
    for (DerivedField dField : model.getLocalTransformations().getDerivedFields()) {
        List<FieldName> list = treeMapOfTransform.get(dField.getName());
        boolean isLeaf = (list == null || list.size() == 0);
        FieldName root = getRoot(dField.getName(), reversMiningTransformMap);
        if (isLeaf && isRootInMiningList(root, miningList)) {
            DerivedField field = new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE).setName(dField.getName()).setExpression(new FieldRef(dField.getName()));
            nnInputs.addNeuralInputs(new NeuralInput("0," + (index++), field));
        }
    }
    DerivedField field = new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE).setName(new FieldName(PluginConstants.biasValue)).setExpression(new FieldRef(new FieldName(PluginConstants.biasValue)));
    nnInputs.addNeuralInputs(new NeuralInput(PluginConstants.biasValue, field));
    return nnInputs;
}
Also used : NormContinuous(org.dmg.pmml.NormContinuous) MiningField(org.dmg.pmml.MiningField) FieldRef(org.dmg.pmml.FieldRef) NeuralInputs(org.dmg.pmml.neural_network.NeuralInputs) HashMap(java.util.HashMap) MapValues(org.dmg.pmml.MapValues) Discretize(org.dmg.pmml.Discretize) ArrayList(java.util.ArrayList) List(java.util.List) FieldName(org.dmg.pmml.FieldName) DerivedField(org.dmg.pmml.DerivedField) NeuralInput(org.dmg.pmml.neural_network.NeuralInput)

Aggregations

Discretize (org.dmg.pmml.Discretize)5 DerivedField (org.dmg.pmml.DerivedField)4 ArrayList (java.util.ArrayList)3 DiscretizeBin (org.dmg.pmml.DiscretizeBin)3 Interval (org.dmg.pmml.Interval)3 HashMap (java.util.HashMap)2 FieldName (org.dmg.pmml.FieldName)2 MapValues (org.dmg.pmml.MapValues)2 MiningField (org.dmg.pmml.MiningField)2 NormContinuous (org.dmg.pmml.NormContinuous)2 List (java.util.List)1 Bucketizer (org.apache.spark.ml.feature.Bucketizer)1 FieldRef (org.dmg.pmml.FieldRef)1 LocalTransformations (org.dmg.pmml.LocalTransformations)1 NeuralInput (org.dmg.pmml.neural_network.NeuralInput)1 NeuralInputs (org.dmg.pmml.neural_network.NeuralInputs)1 NumericPredictor (org.dmg.pmml.regression.NumericPredictor)1 RegressionTable (org.dmg.pmml.regression.RegressionTable)1 CategoricalFeature (org.jpmml.converter.CategoricalFeature)1 ContinuousFeature (org.jpmml.converter.ContinuousFeature)1