use of org.dmg.pmml.Discretize in project jpmml-r by jpmml.
the class FormulaUtil method createDiscretize.
private static Discretize createDiscretize(FieldName name, List<String> categories) {
Discretize discretize = new Discretize(name);
for (String category : categories) {
Interval interval = ExpressionTranslator.translateInterval(category);
DiscretizeBin discretizeBin = new DiscretizeBin(category, interval);
discretize.addDiscretizeBins(discretizeBin);
}
return discretize;
}
use of org.dmg.pmml.Discretize in project jpmml-sparkml by jpmml.
the class BucketizerConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
Bucketizer transformer = getTransformer();
Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
ContinuousFeature continuousFeature = feature.toContinuousFeature();
Discretize discretize = new Discretize(continuousFeature.getName());
List<String> categories = new ArrayList<>();
double[] splits = transformer.getSplits();
for (int i = 0; i < (splits.length - 1); i++) {
String category = String.valueOf(i);
categories.add(category);
Interval interval = new Interval((i < (splits.length - 2)) ? Interval.Closure.CLOSED_OPEN : Interval.Closure.CLOSED_CLOSED).setLeftMargin(formatMargin(splits[i])).setRightMargin(formatMargin(splits[i + 1]));
DiscretizeBin discretizeBin = new DiscretizeBin(category, interval);
discretize.addDiscretizeBins(discretizeBin);
}
DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.INTEGER, discretize);
return Collections.<Feature>singletonList(new CategoricalFeature(encoder, derivedField, categories));
}
use of org.dmg.pmml.Discretize in project shifu by ShifuML.
the class PMMLLRModelBuilder method adaptMLModelToPMML.
public RegressionModel adaptMLModelToPMML(ml.shifu.shifu.core.LR lr, RegressionModel pmmlModel) {
pmmlModel.setNormalizationMethod(NormalizationMethod.LOGIT);
pmmlModel.setMiningFunction(MiningFunction.REGRESSION);
RegressionTable table = new RegressionTable();
table.setIntercept(lr.getBias());
LocalTransformations lt = pmmlModel.getLocalTransformations();
List<DerivedField> df = lt.getDerivedFields();
HashMap<FieldName, FieldName> miningTransformMap = new HashMap<FieldName, FieldName>();
for (DerivedField dField : df) {
// Apply z-scale normalization on numerical variables
if (dField.getExpression() instanceof NormContinuous) {
miningTransformMap.put(((NormContinuous) dField.getExpression()).getField(), dField.getName());
} else // Apply bin map on categorical variables
if (dField.getExpression() instanceof MapValues) {
miningTransformMap.put(((MapValues) dField.getExpression()).getFieldColumnPairs().get(0).getField(), dField.getName());
} else if (dField.getExpression() instanceof Discretize) {
miningTransformMap.put(((Discretize) dField.getExpression()).getField(), dField.getName());
}
}
List<MiningField> miningList = pmmlModel.getMiningSchema().getMiningFields();
int index = 0;
for (int i = 0; i < miningList.size(); i++) {
MiningField mField = miningList.get(i);
if (mField.getUsageType() != UsageType.ACTIVE)
continue;
FieldName mFieldName = mField.getName();
FieldName fName = mFieldName;
while (miningTransformMap.containsKey(fName)) {
fName = miningTransformMap.get(fName);
}
NumericPredictor np = new NumericPredictor();
np.setName(fName);
np.setCoefficient(lr.getWeights()[index++]);
table.addNumericPredictors(np);
}
pmmlModel.addRegressionTables(table);
return pmmlModel;
}
use of org.dmg.pmml.Discretize in project shifu by ShifuML.
the class WoeLocalTransformCreator method createNumericalDerivedField.
/**
* Create @DerivedField for numerical variable
*
* @param config
* - ColumnConfig for numerical variable
* @param cutoff
* - cutoff of normalization
* @param normType
* - the normalization method that is used to generate DerivedField
* @return DerivedField for variable
*/
@Override
protected List<DerivedField> createNumericalDerivedField(ColumnConfig config, double cutoff, ModelNormalizeConf.NormType normType) {
List<Double> binWoeList = (normType.equals(ModelNormalizeConf.NormType.WOE) ? config.getBinCountWoe() : config.getBinWeightedWoe());
List<Double> binBoundaryList = config.getBinBoundary();
List<DiscretizeBin> discretizeBinList = new ArrayList<DiscretizeBin>();
for (int i = 0; i < binBoundaryList.size(); i++) {
DiscretizeBin discretizeBin = new DiscretizeBin();
Interval interval = new Interval();
if (i == 0) {
if (binBoundaryList.size() == 1) {
interval.setClosure(Interval.Closure.OPEN_OPEN).setLeftMargin(Double.NEGATIVE_INFINITY).setRightMargin(Double.POSITIVE_INFINITY);
} else {
interval.setClosure(Interval.Closure.OPEN_OPEN).setRightMargin(binBoundaryList.get(i + 1));
}
} else if (i == binBoundaryList.size() - 1) {
interval.setClosure(Interval.Closure.CLOSED_OPEN).setLeftMargin(binBoundaryList.get(i));
} else {
interval.setClosure(Interval.Closure.CLOSED_OPEN).setLeftMargin(binBoundaryList.get(i)).setRightMargin(binBoundaryList.get(i + 1));
}
discretizeBin.setInterval(interval).setBinValue(Double.toString(binWoeList.get(i)));
discretizeBinList.add(discretizeBin);
}
Discretize discretize = new Discretize();
discretize.setDataType(DataType.DOUBLE).setField(FieldName.create(NormalUtils.getSimpleColumnName(config, columnConfigList, segmentExpansions, datasetHeaders))).setMapMissingTo(Normalizer.normalize(config, null, cutoff, normType).get(0).toString()).setDefaultValue(Normalizer.normalize(config, null, cutoff, normType).get(0).toString()).addDiscretizeBins(discretizeBinList.toArray(new DiscretizeBin[discretizeBinList.size()]));
// derived field name is consisted of FieldName and "_zscl"
List<DerivedField> derivedFields = new ArrayList<DerivedField>();
derivedFields.add(new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE).setName(FieldName.create(genPmmlColumnName(NormalUtils.getSimpleColumnName(config.getColumnName()), normType))).setExpression(discretize));
return derivedFields;
}
use of org.dmg.pmml.Discretize in project shifu by ShifuML.
the class NeuralNetworkModelIntegrator method getNeuralInputs.
private NeuralInputs getNeuralInputs(final NeuralNetwork model) {
NeuralInputs nnInputs = new NeuralInputs();
// get HashMap for local transform and MiningSchema fields
HashMap<FieldName, FieldName> reversMiningTransformMap = new HashMap<FieldName, FieldName>();
HashMap<FieldName, List<FieldName>> treeMapOfTransform = new HashMap<FieldName, List<FieldName>>();
for (DerivedField dField : model.getLocalTransformations().getDerivedFields()) {
// Apply z-scale normalization on numerical variables
FieldName parentField = null;
if (dField.getExpression() instanceof NormContinuous) {
parentField = ((NormContinuous) dField.getExpression()).getField();
reversMiningTransformMap.put(dField.getName(), parentField);
} else // Apply bin map on categorical variables
if (dField.getExpression() instanceof MapValues) {
parentField = ((MapValues) dField.getExpression()).getFieldColumnPairs().get(0).getField();
reversMiningTransformMap.put(dField.getName(), parentField);
} else if (dField.getExpression() instanceof Discretize) {
parentField = ((Discretize) dField.getExpression()).getField();
reversMiningTransformMap.put(dField.getName(), parentField);
}
List<FieldName> fieldNames = treeMapOfTransform.get(parentField);
if (fieldNames == null) {
fieldNames = new ArrayList<FieldName>();
}
fieldNames.add(dField.getName());
treeMapOfTransform.put(parentField, fieldNames);
}
// comment here
List<MiningField> miningList = model.getMiningSchema().getMiningFields();
int index = 0;
for (DerivedField dField : model.getLocalTransformations().getDerivedFields()) {
List<FieldName> list = treeMapOfTransform.get(dField.getName());
boolean isLeaf = (list == null || list.size() == 0);
FieldName root = getRoot(dField.getName(), reversMiningTransformMap);
if (isLeaf && isRootInMiningList(root, miningList)) {
DerivedField field = new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE).setName(dField.getName()).setExpression(new FieldRef(dField.getName()));
nnInputs.addNeuralInputs(new NeuralInput("0," + (index++), field));
}
}
DerivedField field = new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE).setName(new FieldName(PluginConstants.biasValue)).setExpression(new FieldRef(new FieldName(PluginConstants.biasValue)));
nnInputs.addNeuralInputs(new NeuralInput(PluginConstants.biasValue, field));
return nnInputs;
}
Aggregations