Search in sources :

Example 6 with DerivedField

use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.

the class VectorIndexerModelConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    VectorIndexerModel transformer = getTransformer();
    List<Feature> features = encoder.getFeatures(transformer.getInputCol());
    int numFeatures = transformer.numFeatures();
    if (numFeatures != features.size()) {
        throw new IllegalArgumentException("Expected " + numFeatures + " features, got " + features.size() + " features");
    }
    Map<Integer, Map<Double, Integer>> categoryMaps = transformer.javaCategoryMaps();
    List<Feature> result = new ArrayList<>();
    for (int i = 0; i < numFeatures; i++) {
        Feature feature = features.get(i);
        Map<Double, Integer> categoryMap = categoryMaps.get(i);
        if (categoryMap != null) {
            List<String> categories = new ArrayList<>();
            List<String> values = new ArrayList<>();
            DocumentBuilder documentBuilder = DOMUtil.createDocumentBuilder();
            InlineTable inlineTable = new InlineTable();
            List<String> columns = Arrays.asList("input", "output");
            List<Map.Entry<Double, Integer>> entries = new ArrayList<>(categoryMap.entrySet());
            Collections.sort(entries, VectorIndexerModelConverter.COMPARATOR);
            for (Map.Entry<Double, Integer> entry : entries) {
                String category = ValueUtil.formatValue(entry.getKey());
                categories.add(category);
                String value = ValueUtil.formatValue(entry.getValue());
                values.add(value);
                Row row = DOMUtil.createRow(documentBuilder, columns, Arrays.asList(category, value));
                inlineTable.addRows(row);
            }
            encoder.toCategorical(feature.getName(), categories);
            MapValues mapValues = new MapValues().addFieldColumnPairs(new FieldColumnPair(feature.getName(), columns.get(0))).setOutputColumn(columns.get(1)).setInlineTable(inlineTable);
            DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CATEGORICAL, DataType.INTEGER, mapValues);
            result.add(new CategoricalFeature(encoder, derivedField, values));
        } else {
            result.add((ContinuousFeature) feature);
        }
    }
    return result;
}
Also used : InlineTable(org.dmg.pmml.InlineTable) ArrayList(java.util.ArrayList) FieldColumnPair(org.dmg.pmml.FieldColumnPair) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) DocumentBuilder(javax.xml.parsers.DocumentBuilder) MapValues(org.dmg.pmml.MapValues) VectorIndexerModel(org.apache.spark.ml.feature.VectorIndexerModel) Row(org.dmg.pmml.Row) Map(java.util.Map) DerivedField(org.dmg.pmml.DerivedField)

Example 7 with DerivedField

use of org.dmg.pmml.DerivedField in project jpmml-sparkml by jpmml.

the class BinarizerConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    Binarizer transformer = getTransformer();
    Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
    ContinuousFeature continuousFeature = feature.toContinuousFeature();
    Apply apply = new Apply("if").addExpressions(PMMLUtil.createApply("lessOrEqual", continuousFeature.ref(), PMMLUtil.createConstant(transformer.getThreshold()))).addExpressions(PMMLUtil.createConstant(0d), PMMLUtil.createConstant(1d));
    DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.DOUBLE, apply);
    return Collections.<Feature>singletonList(new CategoricalFeature(encoder, derivedField, Arrays.asList("0", "1")));
}
Also used : ContinuousFeature(org.jpmml.converter.ContinuousFeature) Apply(org.dmg.pmml.Apply) Binarizer(org.apache.spark.ml.feature.Binarizer) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) DerivedField(org.dmg.pmml.DerivedField) CategoricalFeature(org.jpmml.converter.CategoricalFeature)

Example 8 with DerivedField

use of org.dmg.pmml.DerivedField in project shifu by ShifuML.

the class PMMLAdapterCommonUtil method getOutputFields.

/**
 * Create PMML neural output for the neural network models
 *
 * @param schema
 *            the schema
 * @param layerID
 *            which layer the output neuron lies
 * @return neural outputs
 */
public static NeuralOutputs getOutputFields(final MiningSchema schema, final int layerID) {
    List<String> outputID = getSchemaFieldViaUsageType(schema, UsageType.TARGET);
    NeuralOutputs outputs = new NeuralOutputs();
    int outputFieldsNum = outputID.size();
    outputs.setNumberOfOutputs(outputFieldsNum);
    /*        if ( outputFieldsNum > 0 ) {
            for (int i = 0; i < outputFieldsNum; i++) {
                DerivedField field = new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE);
                field.withExpression(new NormDiscrete()
                        .withField(new FieldName(outputID.get(i)))
                        .withValue(outputID.get(i)));
                outputs.withNeuralOutputs(new NeuralOutput(field, String.valueOf(layerID + "," + i)));
            }
        } else {*/
    for (int i = 0; i < outputFieldsNum; i++) {
        DerivedField field = new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE);
        field.setExpression(new FieldRef(new FieldName(outputID.get(i))));
        outputs.addNeuralOutputs(new NeuralOutput(String.valueOf(layerID + "," + i), field));
    }
    /*        }*/
    return outputs;
}
Also used : FieldRef(org.dmg.pmml.FieldRef) DerivedField(org.dmg.pmml.DerivedField) FieldName(org.dmg.pmml.FieldName)

Example 9 with DerivedField

use of org.dmg.pmml.DerivedField in project shifu by ShifuML.

the class WoeZscoreLocalTransformCreator method createNumericalDerivedField.

/**
 * Create @DerivedField for numerical variable
 *
 * @param config - ColumnConfig for numerical variable
 * @param cutoff - cutoff of normalization
 * @return DerivedField for variable
 */
@Override
protected List<DerivedField> createNumericalDerivedField(ColumnConfig config, double cutoff, ModelNormalizeConf.NormType normType) {
    List<DerivedField> derivedFields = new ArrayList<DerivedField>();
    DerivedField derivedField = super.createNumericalDerivedField(config, cutoff, ModelNormalizeConf.NormType.WOE).get(0);
    derivedFields.add(derivedField);
    double[] meanAndStdDev = Normalizer.calculateWoeMeanAndStdDev(config, isWeightedNorm);
    // added capping logic to linearNorm
    LinearNorm from = new LinearNorm().setOrig(meanAndStdDev[0] - meanAndStdDev[1] * cutoff).setNorm(-cutoff);
    LinearNorm to = new LinearNorm().setOrig(meanAndStdDev[0] + meanAndStdDev[1] * cutoff).setNorm(cutoff);
    NormContinuous normContinuous = new NormContinuous();
    normContinuous.setField(FieldName.create(derivedField.getName().getValue()));
    normContinuous.addLinearNorms(from, to);
    normContinuous.setMapMissingTo(0.0);
    normContinuous.setOutliers(OutlierTreatmentMethod.AS_EXTREME_VALUES);
    // derived field name is consisted of FieldName and "_zscl"
    derivedFields.add(new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE).setName(FieldName.create(genPmmlColumnName(NormalUtils.getSimpleColumnName(config.getColumnName()), normType))).setExpression(normContinuous));
    return derivedFields;
}
Also used : NormContinuous(org.dmg.pmml.NormContinuous) LinearNorm(org.dmg.pmml.LinearNorm) ArrayList(java.util.ArrayList) DerivedField(org.dmg.pmml.DerivedField)

Example 10 with DerivedField

use of org.dmg.pmml.DerivedField in project shifu by ShifuML.

the class ZscoreLocalTransformCreator method createNumericalDerivedField.

/**
 * Create @DerivedField for numerical variable
 *
 * @param config
 *            - ColumnConfig for numerical variable
 * @param cutoff
 *            - cutoff of normalization
 * @param normType
 *            - the normalization method that is used to generate DerivedField
 * @return DerivedField for variable
 */
protected List<DerivedField> createNumericalDerivedField(ColumnConfig config, double cutoff, ModelNormalizeConf.NormType normType) {
    // added capping logic to linearNorm
    LinearNorm from = new LinearNorm().setOrig(config.getMean() - config.getStdDev() * cutoff).setNorm(-cutoff);
    LinearNorm to = new LinearNorm().setOrig(config.getMean() + config.getStdDev() * cutoff).setNorm(cutoff);
    NormContinuous normContinuous = new NormContinuous();
    normContinuous.setField(FieldName.create(NormalUtils.getSimpleColumnName(config, columnConfigList, segmentExpansions, datasetHeaders)));
    normContinuous.addLinearNorms(from, to);
    normContinuous.setMapMissingTo(0.0);
    normContinuous.setOutliers(OutlierTreatmentMethod.AS_EXTREME_VALUES);
    // derived field name is consisted of FieldName and "_zscl"
    List<DerivedField> derivedFields = new ArrayList<DerivedField>();
    derivedFields.add(new DerivedField(OpType.CONTINUOUS, DataType.DOUBLE).setName(FieldName.create(genPmmlColumnName(NormalUtils.getSimpleColumnName(config.getColumnName()), normType))).setExpression(normContinuous));
    return derivedFields;
}
Also used : NormContinuous(org.dmg.pmml.NormContinuous) LinearNorm(org.dmg.pmml.LinearNorm) ArrayList(java.util.ArrayList) DerivedField(org.dmg.pmml.DerivedField)

Aggregations

DerivedField (org.dmg.pmml.DerivedField)27 ArrayList (java.util.ArrayList)16 Feature (org.jpmml.converter.Feature)14 ContinuousFeature (org.jpmml.converter.ContinuousFeature)13 FieldName (org.dmg.pmml.FieldName)10 Apply (org.dmg.pmml.Apply)9 Expression (org.dmg.pmml.Expression)8 NormContinuous (org.dmg.pmml.NormContinuous)6 CategoricalFeature (org.jpmml.converter.CategoricalFeature)5 DataField (org.dmg.pmml.DataField)4 Discretize (org.dmg.pmml.Discretize)4 MapValues (org.dmg.pmml.MapValues)4 Vector (org.apache.spark.ml.linalg.Vector)3 FieldRef (org.dmg.pmml.FieldRef)3 LocalTransformations (org.dmg.pmml.LocalTransformations)3 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 Constant (org.dmg.pmml.Constant)2 DiscretizeBin (org.dmg.pmml.DiscretizeBin)2