Search in sources :

Example 1 with IndexFeature

use of org.jpmml.converter.IndexFeature in project jpmml-sparkml by jpmml.

the class BinarizerConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    Binarizer transformer = getTransformer();
    Double threshold = transformer.getThreshold();
    InOutMode inputMode = getInputMode();
    List<Feature> result = new ArrayList<>();
    String[] inputCols = inputMode.getInputCols(transformer);
    for (int i = 0; i < inputCols.length; i++) {
        String inputCol = inputCols[i];
        Feature feature = encoder.getOnlyFeature(inputCol);
        ContinuousFeature continuousFeature = feature.toContinuousFeature();
        Apply apply = new Apply(PMMLFunctions.IF).addExpressions(PMMLUtil.createApply(PMMLFunctions.LESSOREQUAL, continuousFeature.ref(), PMMLUtil.createConstant(threshold))).addExpressions(PMMLUtil.createConstant(0d), PMMLUtil.createConstant(1d));
        DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CATEGORICAL, DataType.DOUBLE, apply);
        result.add(new IndexFeature(encoder, derivedField, Arrays.asList(0d, 1d)));
    }
    return result;
}
Also used : IndexFeature(org.jpmml.converter.IndexFeature) Apply(org.dmg.pmml.Apply) ArrayList(java.util.ArrayList) IndexFeature(org.jpmml.converter.IndexFeature) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Binarizer(org.apache.spark.ml.feature.Binarizer) DerivedField(org.dmg.pmml.DerivedField)

Example 2 with IndexFeature

use of org.jpmml.converter.IndexFeature in project jpmml-sparkml by jpmml.

the class ClusteringModelConverter method registerOutputFields.

@Override
public List<OutputField> registerOutputFields(Label label, org.dmg.pmml.Model pmmlModel, SparkMLEncoder encoder) {
    T model = getTransformer();
    List<Integer> clusters = LabelUtil.createTargetCategories(getNumberOfClusters());
    String predictionCol = model.getPredictionCol();
    OutputField pmmlPredictedOutputField = ModelUtil.createPredictedField(FieldNameUtil.create("pmml", predictionCol), OpType.CATEGORICAL, DataType.STRING).setFinalResult(false);
    DerivedOutputField pmmlPredictedField = encoder.createDerivedField(pmmlModel, pmmlPredictedOutputField, true);
    OutputField predictedOutputField = new OutputField(FieldName.create(predictionCol), OpType.CATEGORICAL, DataType.INTEGER).setResultFeature(ResultFeature.TRANSFORMED_VALUE).setExpression(new FieldRef(pmmlPredictedField.getName()));
    DerivedOutputField predictedField = encoder.createDerivedField(pmmlModel, predictedOutputField, true);
    encoder.putOnlyFeature(predictionCol, new IndexFeature(encoder, predictedField, clusters));
    return Collections.emptyList();
}
Also used : IndexFeature(org.jpmml.converter.IndexFeature) FieldRef(org.dmg.pmml.FieldRef) DerivedOutputField(org.jpmml.converter.DerivedOutputField) OutputField(org.dmg.pmml.OutputField) DerivedOutputField(org.jpmml.converter.DerivedOutputField)

Example 3 with IndexFeature

use of org.jpmml.converter.IndexFeature in project jpmml-sparkml by jpmml.

the class ClassificationModelConverter method registerOutputFields.

@Override
public List<OutputField> registerOutputFields(Label label, Model pmmlModel, SparkMLEncoder encoder) {
    T model = getTransformer();
    CategoricalLabel categoricalLabel = (CategoricalLabel) label;
    List<Integer> categories = LabelUtil.createTargetCategories(categoricalLabel.size());
    String predictionCol = model.getPredictionCol();
    Boolean keepPredictionCol = (Boolean) getOption(HasPredictionModelOptions.OPTION_KEEP_PREDICTIONCOL, Boolean.TRUE);
    OutputField pmmlPredictedOutputField = ModelUtil.createPredictedField(FieldNameUtil.create("pmml", predictionCol), OpType.CATEGORICAL, categoricalLabel.getDataType()).setFinalResult(false);
    DerivedOutputField pmmlPredictedField = encoder.createDerivedField(pmmlModel, pmmlPredictedOutputField, keepPredictionCol);
    MapValues mapValues = PMMLUtil.createMapValues(pmmlPredictedField.getName(), categoricalLabel.getValues(), categories).setDataType(DataType.DOUBLE);
    OutputField predictedOutputField = new OutputField(FieldName.create(predictionCol), OpType.CONTINUOUS, DataType.DOUBLE).setResultFeature(ResultFeature.TRANSFORMED_VALUE).setExpression(mapValues);
    DerivedOutputField predictedField = encoder.createDerivedField(pmmlModel, predictedOutputField, keepPredictionCol);
    encoder.putOnlyFeature(predictionCol, new IndexFeature(encoder, predictedField, categories));
    List<OutputField> result = new ArrayList<>();
    if (model instanceof HasProbabilityCol) {
        HasProbabilityCol hasProbabilityCol = (HasProbabilityCol) model;
        String probabilityCol = hasProbabilityCol.getProbabilityCol();
        List<Feature> features = new ArrayList<>();
        for (int i = 0; i < categoricalLabel.size(); i++) {
            Object value = categoricalLabel.getValue(i);
            OutputField probabilityField = ModelUtil.createProbabilityField(FieldNameUtil.create(probabilityCol, value), DataType.DOUBLE, value);
            result.add(probabilityField);
            features.add(new ContinuousFeature(encoder, probabilityField));
        }
        // XXX
        encoder.putFeatures(probabilityCol, features);
    }
    return result;
}
Also used : IndexFeature(org.jpmml.converter.IndexFeature) HasProbabilityCol(org.apache.spark.ml.param.shared.HasProbabilityCol) ArrayList(java.util.ArrayList) IndexFeature(org.jpmml.converter.IndexFeature) ResultFeature(org.dmg.pmml.ResultFeature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) DerivedOutputField(org.jpmml.converter.DerivedOutputField) MapValues(org.dmg.pmml.MapValues) CategoricalLabel(org.jpmml.converter.CategoricalLabel) OutputField(org.dmg.pmml.OutputField) DerivedOutputField(org.jpmml.converter.DerivedOutputField)

Example 4 with IndexFeature

use of org.jpmml.converter.IndexFeature in project jpmml-sparkml by jpmml.

the class ModelConverter method getLabel.

public Label getLabel(SparkMLEncoder encoder) {
    T model = getTransformer();
    Label label = null;
    if (model instanceof HasLabelCol) {
        HasLabelCol hasLabelCol = (HasLabelCol) model;
        String labelCol = hasLabelCol.getLabelCol();
        Feature feature = encoder.getOnlyFeature(labelCol);
        MiningFunction miningFunction = getMiningFunction();
        switch(miningFunction) {
            case CLASSIFICATION:
                {
                    if (feature instanceof BooleanFeature) {
                        BooleanFeature booleanFeature = (BooleanFeature) feature;
                        label = new CategoricalLabel(booleanFeature.getName(), booleanFeature.getDataType(), booleanFeature.getValues());
                    } else if (feature instanceof CategoricalFeature) {
                        CategoricalFeature categoricalFeature = (CategoricalFeature) feature;
                        DataField dataField = (DataField) categoricalFeature.getField();
                        label = new CategoricalLabel(dataField);
                    } else if (feature instanceof ContinuousFeature) {
                        ContinuousFeature continuousFeature = (ContinuousFeature) feature;
                        int numClasses = 2;
                        if (model instanceof ClassificationModel) {
                            ClassificationModel<?, ?> classificationModel = (ClassificationModel<?, ?>) model;
                            numClasses = classificationModel.numClasses();
                        }
                        List<Integer> categories = LabelUtil.createTargetCategories(numClasses);
                        Field<?> field = encoder.toCategorical(continuousFeature.getName(), categories);
                        encoder.putOnlyFeature(labelCol, new IndexFeature(encoder, field, categories));
                        label = new CategoricalLabel(field.getName(), field.getDataType(), categories);
                    } else {
                        throw new IllegalArgumentException("Expected a categorical or categorical-like continuous feature, got " + feature);
                    }
                }
                break;
            case REGRESSION:
                {
                    Field<?> field = encoder.toContinuous(feature.getName());
                    field.setDataType(DataType.DOUBLE);
                    label = new ContinuousLabel(field.getName(), field.getDataType());
                }
                break;
            default:
                throw new IllegalArgumentException("Mining function " + miningFunction + " is not supported");
        }
    }
    if (model instanceof ClassificationModel) {
        ClassificationModel<?, ?> classificationModel = (ClassificationModel<?, ?>) model;
        int numClasses = classificationModel.numClasses();
        CategoricalLabel categoricalLabel = (CategoricalLabel) label;
        SchemaUtil.checkSize(numClasses, categoricalLabel);
    }
    return label;
}
Also used : IndexFeature(org.jpmml.converter.IndexFeature) ContinuousLabel(org.jpmml.converter.ContinuousLabel) CategoricalLabel(org.jpmml.converter.CategoricalLabel) Label(org.jpmml.converter.Label) IndexFeature(org.jpmml.converter.IndexFeature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Feature(org.jpmml.converter.Feature) BooleanFeature(org.jpmml.converter.BooleanFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) BooleanFeature(org.jpmml.converter.BooleanFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) HasLabelCol(org.apache.spark.ml.param.shared.HasLabelCol) OutputField(org.dmg.pmml.OutputField) Field(org.dmg.pmml.Field) DataField(org.dmg.pmml.DataField) ContinuousFeature(org.jpmml.converter.ContinuousFeature) DataField(org.dmg.pmml.DataField) CategoricalLabel(org.jpmml.converter.CategoricalLabel) List(java.util.List) MiningFunction(org.dmg.pmml.MiningFunction) ClassificationModel(org.apache.spark.ml.classification.ClassificationModel) ContinuousLabel(org.jpmml.converter.ContinuousLabel)

Example 5 with IndexFeature

use of org.jpmml.converter.IndexFeature in project jpmml-sparkml by jpmml.

the class BucketizerConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    Bucketizer transformer = getTransformer();
    InOutMode inputMode = getInputMode();
    String[] inputCols;
    double[][] splitsArray;
    if ((InOutMode.SINGLE).equals(inputMode)) {
        inputCols = inputMode.getInputCols(transformer);
        splitsArray = new double[][] { transformer.getSplits() };
    } else if ((InOutMode.MULTIPLE).equals(inputMode)) {
        inputCols = inputMode.getInputCols(transformer);
        splitsArray = transformer.getSplitsArray();
    } else {
        throw new IllegalArgumentException();
    }
    List<Feature> result = new ArrayList<>();
    for (int i = 0; i < inputCols.length; i++) {
        String inputCol = inputCols[i];
        double[] splits = splitsArray[i];
        Feature feature = encoder.getOnlyFeature(inputCol);
        ContinuousFeature continuousFeature = feature.toContinuousFeature();
        Discretize discretize = new Discretize(continuousFeature.getName()).setDataType(DataType.INTEGER);
        List<Integer> categories = new ArrayList<>();
        for (int j = 0; j < (splits.length - 1); j++) {
            Integer category = j;
            categories.add(category);
            Interval interval = new Interval((j < (splits.length - 2)) ? Interval.Closure.CLOSED_OPEN : Interval.Closure.CLOSED_CLOSED).setLeftMargin(formatMargin(splits[j])).setRightMargin(formatMargin(splits[j + 1]));
            DiscretizeBin discretizeBin = new DiscretizeBin(category, interval);
            discretize.addDiscretizeBins(discretizeBin);
        }
        DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CATEGORICAL, DataType.INTEGER, discretize);
        result.add(new IndexFeature(encoder, derivedField, categories));
    }
    return result;
}
Also used : IndexFeature(org.jpmml.converter.IndexFeature) ArrayList(java.util.ArrayList) IndexFeature(org.jpmml.converter.IndexFeature) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) Discretize(org.dmg.pmml.Discretize) Bucketizer(org.apache.spark.ml.feature.Bucketizer) DiscretizeBin(org.dmg.pmml.DiscretizeBin) DerivedField(org.dmg.pmml.DerivedField) Interval(org.dmg.pmml.Interval)

Aggregations

IndexFeature (org.jpmml.converter.IndexFeature)5 ContinuousFeature (org.jpmml.converter.ContinuousFeature)4 Feature (org.jpmml.converter.Feature)4 ArrayList (java.util.ArrayList)3 OutputField (org.dmg.pmml.OutputField)3 DerivedField (org.dmg.pmml.DerivedField)2 CategoricalLabel (org.jpmml.converter.CategoricalLabel)2 DerivedOutputField (org.jpmml.converter.DerivedOutputField)2 List (java.util.List)1 ClassificationModel (org.apache.spark.ml.classification.ClassificationModel)1 Binarizer (org.apache.spark.ml.feature.Binarizer)1 Bucketizer (org.apache.spark.ml.feature.Bucketizer)1 HasLabelCol (org.apache.spark.ml.param.shared.HasLabelCol)1 HasProbabilityCol (org.apache.spark.ml.param.shared.HasProbabilityCol)1 Apply (org.dmg.pmml.Apply)1 DataField (org.dmg.pmml.DataField)1 Discretize (org.dmg.pmml.Discretize)1 DiscretizeBin (org.dmg.pmml.DiscretizeBin)1 Field (org.dmg.pmml.Field)1 FieldRef (org.dmg.pmml.FieldRef)1