use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.
the class VectorIndexerModelConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
VectorIndexerModel transformer = getTransformer();
List<Feature> features = encoder.getFeatures(transformer.getInputCol());
int numFeatures = transformer.numFeatures();
if (numFeatures != features.size()) {
throw new IllegalArgumentException("Expected " + numFeatures + " features, got " + features.size() + " features");
}
Map<Integer, Map<Double, Integer>> categoryMaps = transformer.javaCategoryMaps();
List<Feature> result = new ArrayList<>();
for (int i = 0; i < numFeatures; i++) {
Feature feature = features.get(i);
Map<Double, Integer> categoryMap = categoryMaps.get(i);
if (categoryMap != null) {
List<String> categories = new ArrayList<>();
List<String> values = new ArrayList<>();
DocumentBuilder documentBuilder = DOMUtil.createDocumentBuilder();
InlineTable inlineTable = new InlineTable();
List<String> columns = Arrays.asList("input", "output");
List<Map.Entry<Double, Integer>> entries = new ArrayList<>(categoryMap.entrySet());
Collections.sort(entries, VectorIndexerModelConverter.COMPARATOR);
for (Map.Entry<Double, Integer> entry : entries) {
String category = ValueUtil.formatValue(entry.getKey());
categories.add(category);
String value = ValueUtil.formatValue(entry.getValue());
values.add(value);
Row row = DOMUtil.createRow(documentBuilder, columns, Arrays.asList(category, value));
inlineTable.addRows(row);
}
encoder.toCategorical(feature.getName(), categories);
MapValues mapValues = new MapValues().addFieldColumnPairs(new FieldColumnPair(feature.getName(), columns.get(0))).setOutputColumn(columns.get(1)).setInlineTable(inlineTable);
DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CATEGORICAL, DataType.INTEGER, mapValues);
result.add(new CategoricalFeature(encoder, derivedField, values));
} else {
result.add((ContinuousFeature) feature);
}
}
return result;
}
use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.
the class BinarizerConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
Binarizer transformer = getTransformer();
Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
ContinuousFeature continuousFeature = feature.toContinuousFeature();
Apply apply = new Apply("if").addExpressions(PMMLUtil.createApply("lessOrEqual", continuousFeature.ref(), PMMLUtil.createConstant(transformer.getThreshold()))).addExpressions(PMMLUtil.createConstant(0d), PMMLUtil.createConstant(1d));
DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.DOUBLE, apply);
return Collections.<Feature>singletonList(new CategoricalFeature(encoder, derivedField, Arrays.asList("0", "1")));
}
use of org.dmg.pmml.DerivedField in project shifu by ShifuML.
the class PMMLAdapterCommonUtil method getOutputFields.
/**
* Create PMML neural output for the neural network models
*
* @param schema
* the schema
* @param layerID
* which layer the output neuron lies
* @return neural outputs
*/
public static NeuralOutputs getOutputFields(final MiningSchema schema, final int layerID) {
List<String> outputID = getSchemaFieldViaUsageType(schema, UsageType.TARGET);
NeuralOutputs outputs = new NeuralOutputs();
int outputFieldsNum = outputID.size();
outputs.setNumberOfOutputs(outputFieldsNum);
/* if ( outputFieldsNum > 0 ) {
for (int i = 0; i < outputFieldsNum; i++) {
DerivedField field = new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE);
field.withExpression(new NormDiscrete()
.withField(new FieldName(outputID.get(i)))
.withValue(outputID.get(i)));
outputs.withNeuralOutputs(new NeuralOutput(field, String.valueOf(layerID + "," + i)));
}
} else {*/
for (int i = 0; i < outputFieldsNum; i++) {
DerivedField field = new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE);
field.setExpression(new FieldRef(new FieldName(outputID.get(i))));
outputs.addNeuralOutputs(new NeuralOutput(String.valueOf(layerID + "," + i), field));
}
/* }*/
return outputs;
}
use of org.dmg.pmml.DerivedField in project shifu by ShifuML.
the class WoeZscoreLocalTransformCreator method createNumericalDerivedField.
/**
* Create @DerivedField for numerical variable
*
* @param config - ColumnConfig for numerical variable
* @param cutoff - cutoff of normalization
* @return DerivedField for variable
*/
@Override
protected List<DerivedField> createNumericalDerivedField(ColumnConfig config, double cutoff, ModelNormalizeConf.NormType normType) {
List<DerivedField> derivedFields = new ArrayList<DerivedField>();
DerivedField derivedField = super.createNumericalDerivedField(config, cutoff, ModelNormalizeConf.NormType.WOE).get(0);
derivedFields.add(derivedField);
double[] meanAndStdDev = Normalizer.calculateWoeMeanAndStdDev(config, isWeightedNorm);
// added capping logic to linearNorm
LinearNorm from = new LinearNorm().setOrig(meanAndStdDev[0] - meanAndStdDev[1] * cutoff).setNorm(-cutoff);
LinearNorm to = new LinearNorm().setOrig(meanAndStdDev[0] + meanAndStdDev[1] * cutoff).setNorm(cutoff);
NormContinuous normContinuous = new NormContinuous();
normContinuous.setField(FieldName.create(derivedField.getName().getValue()));
normContinuous.addLinearNorms(from, to);
normContinuous.setMapMissingTo(0.0);
normContinuous.setOutliers(OutlierTreatmentMethod.AS_EXTREME_VALUES);
// derived field name is consisted of FieldName and "_zscl"
derivedFields.add(new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE).setName(FieldName.create(genPmmlColumnName(NormalUtils.getSimpleColumnName(config.getColumnName()), normType))).setExpression(normContinuous));
return derivedFields;
}
use of org.dmg.pmml.DerivedField in project shifu by ShifuML.
the class ZscoreLocalTransformCreator method createNumericalDerivedField.
/**
* Create @DerivedField for numerical variable
*
* @param config
* - ColumnConfig for numerical variable
* @param cutoff
* - cutoff of normalization
* @param normType
* - the normalization method that is used to generate DerivedField
* @return DerivedField for variable
*/
protected List<DerivedField> createNumericalDerivedField(ColumnConfig config, double cutoff, ModelNormalizeConf.NormType normType) {
// added capping logic to linearNorm
LinearNorm from = new LinearNorm().setOrig(config.getMean() - config.getStdDev() * cutoff).setNorm(-cutoff);
LinearNorm to = new LinearNorm().setOrig(config.getMean() + config.getStdDev() * cutoff).setNorm(cutoff);
NormContinuous normContinuous = new NormContinuous();
normContinuous.setField(FieldName.create(NormalUtils.getSimpleColumnName(config, columnConfigList, segmentExpansions, datasetHeaders)));
normContinuous.addLinearNorms(from, to);
normContinuous.setMapMissingTo(0.0);
normContinuous.setOutliers(OutlierTreatmentMethod.AS_EXTREME_VALUES);
// derived field name is consisted of FieldName and "_zscl"
List<DerivedField> derivedFields = new ArrayList<DerivedField>();
derivedFields.add(new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE).setName(FieldName.create(genPmmlColumnName(NormalUtils.getSimpleColumnName(config.getColumnName()), normType))).setExpression(normContinuous));
return derivedFields;
}
Aggregations