Search in sources :

Example 16 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class StopWordsRemoverConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    StopWordsRemover transformer = getTransformer();
    DocumentFeature documentFeature = (DocumentFeature) encoder.getOnlyFeature(transformer.getInputCol());
    Pattern pattern = Pattern.compile(documentFeature.getWordSeparatorRE());
    DocumentFeature.StopWordSet stopWordSet = new DocumentFeature.StopWordSet(transformer.getCaseSensitive());
    String[] stopWords = transformer.getStopWords();
    for (String stopWord : stopWords) {
        String[] stopTokens = pattern.split(stopWord);
        // Skip multi-token stopwords. See https://issues.apache.org/jira/browse/SPARK-18374
        if (stopTokens.length > 1) {
            continue;
        }
        if (TermUtil.hasPunctuation(stopWord)) {
            throw new IllegalArgumentException(stopWord);
        }
        stopWordSet.add(stopWord);
    }
    documentFeature.addStopWordSet(stopWordSet);
    return Collections.<Feature>singletonList(documentFeature);
}
Also used : Pattern(java.util.regex.Pattern) StopWordsRemover(org.apache.spark.ml.feature.StopWordsRemover) DocumentFeature(org.jpmml.sparkml.DocumentFeature) Feature(org.jpmml.converter.Feature) DocumentFeature(org.jpmml.sparkml.DocumentFeature)

Example 17 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class VectorAssemblerConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    VectorAssembler transformer = getTransformer();
    List<Feature> result = new ArrayList<>();
    String[] inputCols = transformer.getInputCols();
    for (String inputCol : inputCols) {
        List<Feature> features = encoder.getFeatures(inputCol);
        result.addAll(features);
    }
    return result;
}
Also used : VectorAssembler(org.apache.spark.ml.feature.VectorAssembler) ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature)

Example 18 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class VectorIndexerModelConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    VectorIndexerModel transformer = getTransformer();
    List<Feature> features = encoder.getFeatures(transformer.getInputCol());
    int numFeatures = transformer.numFeatures();
    if (numFeatures != features.size()) {
        throw new IllegalArgumentException("Expected " + numFeatures + " features, got " + features.size() + " features");
    }
    Map<Integer, Map<Double, Integer>> categoryMaps = transformer.javaCategoryMaps();
    List<Feature> result = new ArrayList<>();
    for (int i = 0; i < numFeatures; i++) {
        Feature feature = features.get(i);
        Map<Double, Integer> categoryMap = categoryMaps.get(i);
        if (categoryMap != null) {
            List<String> categories = new ArrayList<>();
            List<String> values = new ArrayList<>();
            DocumentBuilder documentBuilder = DOMUtil.createDocumentBuilder();
            InlineTable inlineTable = new InlineTable();
            List<String> columns = Arrays.asList("input", "output");
            List<Map.Entry<Double, Integer>> entries = new ArrayList<>(categoryMap.entrySet());
            Collections.sort(entries, VectorIndexerModelConverter.COMPARATOR);
            for (Map.Entry<Double, Integer> entry : entries) {
                String category = ValueUtil.formatValue(entry.getKey());
                categories.add(category);
                String value = ValueUtil.formatValue(entry.getValue());
                values.add(value);
                Row row = DOMUtil.createRow(documentBuilder, columns, Arrays.asList(category, value));
                inlineTable.addRows(row);
            }
            encoder.toCategorical(feature.getName(), categories);
            MapValues mapValues = new MapValues().addFieldColumnPairs(new FieldColumnPair(feature.getName(), columns.get(0))).setOutputColumn(columns.get(1)).setInlineTable(inlineTable);
            DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CATEGORICAL, DataType.INTEGER, mapValues);
            result.add(new CategoricalFeature(encoder, derivedField, values));
        } else {
            result.add((ContinuousFeature) feature);
        }
    }
    return result;
}
Also used : InlineTable(org.dmg.pmml.InlineTable) ArrayList(java.util.ArrayList) FieldColumnPair(org.dmg.pmml.FieldColumnPair) Feature(org.jpmml.converter.Feature) ContinuousFeature(org.jpmml.converter.ContinuousFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) CategoricalFeature(org.jpmml.converter.CategoricalFeature) DocumentBuilder(javax.xml.parsers.DocumentBuilder) MapValues(org.dmg.pmml.MapValues) VectorIndexerModel(org.apache.spark.ml.feature.VectorIndexerModel) Row(org.dmg.pmml.Row) Map(java.util.Map) DerivedField(org.dmg.pmml.DerivedField)

Example 19 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class ImputerModelConverter method registerFeatures.

@Override
public void registerFeatures(SparkMLEncoder encoder) {
    ImputerModel transformer = getTransformer();
    List<Feature> features = encodeFeatures(encoder);
    String[] outputCols = transformer.getOutputCols();
    if (outputCols.length != features.size()) {
        throw new IllegalArgumentException();
    }
    for (int i = 0; i < features.size(); i++) {
        String outputCol = outputCols[i];
        Feature feature = features.get(i);
        encoder.putFeatures(outputCol, Collections.singletonList(feature));
    }
}
Also used : ImputerModel(org.apache.spark.ml.feature.ImputerModel) Feature(org.jpmml.converter.Feature)

Example 20 with Feature

use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.

the class InteractionConverter method encodeFeatures.

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
    Interaction transformer = getTransformer();
    String name = "";
    List<Feature> result = new ArrayList<>();
    String[] inputCols = transformer.getInputCols();
    for (int i = 0; i < inputCols.length; i++) {
        String inputCol = inputCols[i];
        List<Feature> features = encoder.getFeatures(inputCol);
        if (i == 0) {
            name = inputCol;
            result = features;
        } else {
            name += (":" + inputCol);
            List<Feature> interactionFeatures = new ArrayList<>();
            int index = 0;
            for (Feature left : result) {
                for (Feature right : features) {
                    interactionFeatures.add(new InteractionFeature(encoder, FieldName.create(name + "[" + index + "]"), DataType.DOUBLE, Arrays.asList(left, right)));
                    index++;
                }
            }
            result = interactionFeatures;
        }
    }
    return result;
}
Also used : InteractionFeature(org.jpmml.converter.InteractionFeature) Interaction(org.apache.spark.ml.feature.Interaction) ArrayList(java.util.ArrayList) Feature(org.jpmml.converter.Feature) InteractionFeature(org.jpmml.converter.InteractionFeature)

Aggregations

Feature (org.jpmml.converter.Feature)53 ContinuousFeature (org.jpmml.converter.ContinuousFeature)30 ArrayList (java.util.ArrayList)27 CategoricalFeature (org.jpmml.converter.CategoricalFeature)19 DerivedField (org.dmg.pmml.DerivedField)14 DataField (org.dmg.pmml.DataField)13 FieldName (org.dmg.pmml.FieldName)10 Apply (org.dmg.pmml.Apply)9 BooleanFeature (org.jpmml.converter.BooleanFeature)9 BinaryFeature (org.jpmml.converter.BinaryFeature)7 List (java.util.List)6 Expression (org.dmg.pmml.Expression)6 SimplePredicate (org.dmg.pmml.SimplePredicate)6 Vector (org.apache.spark.ml.linalg.Vector)5 Predicate (org.dmg.pmml.Predicate)5 Node (org.dmg.pmml.tree.Node)5 DocumentFeature (org.jpmml.sparkml.DocumentFeature)5 InteractionFeature (org.jpmml.converter.InteractionFeature)4 DocumentBuilder (javax.xml.parsers.DocumentBuilder)3 Transformer (org.apache.spark.ml.Transformer)3