use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.
the class StopWordsRemoverConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
StopWordsRemover transformer = getTransformer();
DocumentFeature documentFeature = (DocumentFeature) encoder.getOnlyFeature(transformer.getInputCol());
Pattern pattern = Pattern.compile(documentFeature.getWordSeparatorRE());
DocumentFeature.StopWordSet stopWordSet = new DocumentFeature.StopWordSet(transformer.getCaseSensitive());
String[] stopWords = transformer.getStopWords();
for (String stopWord : stopWords) {
String[] stopTokens = pattern.split(stopWord);
// Skip multi-token stopwords. See https://issues.apache.org/jira/browse/SPARK-18374
if (stopTokens.length > 1) {
continue;
}
if (TermUtil.hasPunctuation(stopWord)) {
throw new IllegalArgumentException(stopWord);
}
stopWordSet.add(stopWord);
}
documentFeature.addStopWordSet(stopWordSet);
return Collections.<Feature>singletonList(documentFeature);
}
use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.
the class VectorAssemblerConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
VectorAssembler transformer = getTransformer();
List<Feature> result = new ArrayList<>();
String[] inputCols = transformer.getInputCols();
for (String inputCol : inputCols) {
List<Feature> features = encoder.getFeatures(inputCol);
result.addAll(features);
}
return result;
}
use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.
the class VectorIndexerModelConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
VectorIndexerModel transformer = getTransformer();
List<Feature> features = encoder.getFeatures(transformer.getInputCol());
int numFeatures = transformer.numFeatures();
if (numFeatures != features.size()) {
throw new IllegalArgumentException("Expected " + numFeatures + " features, got " + features.size() + " features");
}
Map<Integer, Map<Double, Integer>> categoryMaps = transformer.javaCategoryMaps();
List<Feature> result = new ArrayList<>();
for (int i = 0; i < numFeatures; i++) {
Feature feature = features.get(i);
Map<Double, Integer> categoryMap = categoryMaps.get(i);
if (categoryMap != null) {
List<String> categories = new ArrayList<>();
List<String> values = new ArrayList<>();
DocumentBuilder documentBuilder = DOMUtil.createDocumentBuilder();
InlineTable inlineTable = new InlineTable();
List<String> columns = Arrays.asList("input", "output");
List<Map.Entry<Double, Integer>> entries = new ArrayList<>(categoryMap.entrySet());
Collections.sort(entries, VectorIndexerModelConverter.COMPARATOR);
for (Map.Entry<Double, Integer> entry : entries) {
String category = ValueUtil.formatValue(entry.getKey());
categories.add(category);
String value = ValueUtil.formatValue(entry.getValue());
values.add(value);
Row row = DOMUtil.createRow(documentBuilder, columns, Arrays.asList(category, value));
inlineTable.addRows(row);
}
encoder.toCategorical(feature.getName(), categories);
MapValues mapValues = new MapValues().addFieldColumnPairs(new FieldColumnPair(feature.getName(), columns.get(0))).setOutputColumn(columns.get(1)).setInlineTable(inlineTable);
DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CATEGORICAL, DataType.INTEGER, mapValues);
result.add(new CategoricalFeature(encoder, derivedField, values));
} else {
result.add((ContinuousFeature) feature);
}
}
return result;
}
use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.
the class ImputerModelConverter method registerFeatures.
@Override
public void registerFeatures(SparkMLEncoder encoder) {
ImputerModel transformer = getTransformer();
List<Feature> features = encodeFeatures(encoder);
String[] outputCols = transformer.getOutputCols();
if (outputCols.length != features.size()) {
throw new IllegalArgumentException();
}
for (int i = 0; i < features.size(); i++) {
String outputCol = outputCols[i];
Feature feature = features.get(i);
encoder.putFeatures(outputCol, Collections.singletonList(feature));
}
}
use of org.jpmml.converter.Feature in project jpmml-sparkml by jpmml.
the class InteractionConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
Interaction transformer = getTransformer();
String name = "";
List<Feature> result = new ArrayList<>();
String[] inputCols = transformer.getInputCols();
for (int i = 0; i < inputCols.length; i++) {
String inputCol = inputCols[i];
List<Feature> features = encoder.getFeatures(inputCol);
if (i == 0) {
name = inputCol;
result = features;
} else {
name += (":" + inputCol);
List<Feature> interactionFeatures = new ArrayList<>();
int index = 0;
for (Feature left : result) {
for (Feature right : features) {
interactionFeatures.add(new InteractionFeature(encoder, FieldName.create(name + "[" + index + "]"), DataType.DOUBLE, Arrays.asList(left, right)));
index++;
}
}
result = interactionFeatures;
}
}
return result;
}
Aggregations