Search in sources :

Example 21 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class OneHotEncoderModelConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    OneHotEncoderModel transformer = getTransformer();
    String[] inputCols = transformer.getInputCols();
    boolean dropLast = transformer.getDropLast();
    List<Feature> result = new ArrayList<>();
    for (int i = 0; i < inputCols.length; i++) {
        CategoricalFeature categoricalFeature = (CategoricalFeature) encoder.getOnlyFeature(inputCols[i]);
        List<String> values = categoricalFeature.getValues();
        if (dropLast) {
            values = values.subList(0, values.size() - 1);
        }
        List<BinaryFeature> binaryFeatures = new ArrayList<>();
        for (String value : values) {
            binaryFeatures.add(new BinaryFeature(encoder, categoricalFeature.getName(), DataType.STRING, value));
        }
        result.add(new BinarizedCategoricalFeature(encoder, categoricalFeature.getName(), categoricalFeature.getDataType(), binaryFeatures));
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) BinarizedCategoricalFeature(org.jpmml.sparkml.BinarizedCategoricalFeature) BinaryFeature(org.jpmml.converter.BinaryFeature) Feature(org.jpmml.converter.Feature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) BinaryFeature(org.jpmml.converter.BinaryFeature) BinarizedCategoricalFeature(org.jpmml.sparkml.BinarizedCategoricalFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) BinarizedCategoricalFeature(org.jpmml.sparkml.BinarizedCategoricalFeature) OneHotEncoderModel(org.apache.spark.ml.feature.OneHotEncoderModel)

Example 22 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class RFormulaModelConverter method registerFeatures.

@Override
public void registerFeatures(SparkMLEncoder encoder) {
    RFormulaModel transformer = getTransformer();
    ResolvedRFormula resolvedFormula = transformer.resolvedFormula();
    String targetCol = resolvedFormula.label();
    String labelCol = transformer.getLabelCol();
    if (!(targetCol).equals(labelCol)) {
        List<Feature> features = encoder.getFeatures(targetCol);
        encoder.putFeatures(labelCol, features);
    }
    PipelineModel pipelineModel = transformer.pipelineModel();
    Transformer[] stages = pipelineModel.stages();
    for (Transformer stage : stages) {
        FeatureConverter<?> featureConverter = ConverterUtil.createFeatureConverter(stage);
        featureConverter.registerFeatures(encoder);
    }
}
Also used : Transformer(org.apache.spark.ml.Transformer) ResolvedRFormula(org.apache.spark.ml.feature.ResolvedRFormula) RFormulaModel(org.apache.spark.ml.feature.RFormulaModel) Feature(org.jpmml.converter.Feature) PipelineModel(org.apache.spark.ml.PipelineModel)

Example 23 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class RegexTokenizerConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    RegexTokenizer transformer = getTransformer();
    if (!transformer.getGaps()) {
        throw new IllegalArgumentException("Expected splitter mode, got token matching mode");
    }
    if (transformer.getMinTokenLength() != 1) {
        throw new IllegalArgumentException("Expected 1 as minimum token length, got " + transformer.getMinTokenLength() + " as minimum token length");
    }
    Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
    Field<?> field = encoder.getField(feature.getName());
    if (transformer.getToLowercase()) {
        Apply apply = PMMLUtil.createApply("lowercase", feature.ref());
        field = encoder.createDerivedField(FeatureUtil.createName("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply);
    }
    return Collections.<Feature>singletonList(new DocumentFeature(encoder, field, transformer.getPattern()));
}
Also used : Apply(org.dmg.pmml.Apply) RegexTokenizer(org.apache.spark.ml.feature.RegexTokenizer) DocumentFeature(org.jpmml.sparkml.DocumentFeature) Feature(org.jpmml.converter.Feature) DocumentFeature(org.jpmml.sparkml.DocumentFeature)

Example 24 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class BinarizerConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    Binarizer transformer = getTransformer();
    Feature feature = encoder.getOnlyFeature(transformer.getInputCol());
    ContinuousFeature continuousFeature = feature.toContinuousFeature();
    Apply apply = new Apply("if").addExpressions(PMMLUtil.createApply("lessOrEqual", continuousFeature.ref(), PMMLUtil.createConstant(transformer.getThreshold()))).addExpressions(PMMLUtil.createConstant(0d), PMMLUtil.createConstant(1d));
    DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.DOUBLE, apply);
    return Collections.<Feature>singletonList(new CategoricalFeature(encoder, derivedField, Arrays.asList("0", "1")));
}
Also used : ContinuousFeature(org.jpmml.converter.ContinuousFeature) Apply(org.dmg.pmml.Apply) Binarizer(org.apache.spark.ml.feature.Binarizer) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) DerivedField(org.dmg.pmml.DerivedField) CategoricalFeature(org.jpmml.converter.CategoricalFeature)

Example 25 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class ClusteringModelConverter method registerOutputFields.

@Override
public List<OutputField> registerOutputFields(Label label, SparkMLEncoder encoder) {
    T model = getTransformer();
    String predictionCol = model.getPredictionCol();
    OutputField predictedField = ModelUtil.createPredictedField(FieldName.create(predictionCol), DataType.STRING, OpType.CATEGORICAL);
    Feature feature = new Feature(encoder, predictedField.getName(), predictedField.getDataType()) {

        @Override
        public ContinuousFeature toContinuousFeature() {
            throw new UnsupportedOperationException();
        }
    };
    encoder.putOnlyFeature(predictionCol, feature);
    return Collections.singletonList(predictedField);
}
Also used : OutputField(org.dmg.pmml.OutputField) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature)

Aggregations

Feature (org.jpmml.converter.Feature)53 ContinuousFeature (org.jpmml.converter.ContinuousFeature)30 ArrayList (java.util.ArrayList)27 CategoricalFeature (org.jpmml.converter.CategoricalFeature)19 DerivedField (org.dmg.pmml.DerivedField)14 DataField (org.dmg.pmml.DataField)13 FieldName (org.dmg.pmml.FieldName)10 Apply (org.dmg.pmml.Apply)9 BooleanFeature (org.jpmml.converter.BooleanFeature)9 BinaryFeature (org.jpmml.converter.BinaryFeature)7 List (java.util.List)6 Expression (org.dmg.pmml.Expression)6 SimplePredicate (org.dmg.pmml.SimplePredicate)6 Vector (org.apache.spark.ml.linalg.Vector)5 Predicate (org.dmg.pmml.Predicate)5 Node (org.dmg.pmml.tree.Node)5 DocumentFeature (org.jpmml.sparkml.DocumentFeature)5 InteractionFeature (org.jpmml.converter.InteractionFeature)4 DocumentBuilder (javax.xml.parsers.DocumentBuilder)3 Transformer (org.apache.spark.ml.Transformer)3