use of org.dmg.pmml.FieldColumnPair in project jpmml-sparkml by jpmml.
the class VectorIndexerModelConverter method encodeFeatures.
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder) {
VectorIndexerModel transformer = getTransformer();
List<Feature> features = encoder.getFeatures(transformer.getInputCol());
int numFeatures = transformer.numFeatures();
if (numFeatures != features.size()) {
throw new IllegalArgumentException("Expected " + numFeatures + " features, got " + features.size() + " features");
}
Map<Integer, Map<Double, Integer>> categoryMaps = transformer.javaCategoryMaps();
List<Feature> result = new ArrayList<>();
for (int i = 0; i < numFeatures; i++) {
Feature feature = features.get(i);
Map<Double, Integer> categoryMap = categoryMaps.get(i);
if (categoryMap != null) {
List<String> categories = new ArrayList<>();
List<String> values = new ArrayList<>();
DocumentBuilder documentBuilder = DOMUtil.createDocumentBuilder();
InlineTable inlineTable = new InlineTable();
List<String> columns = Arrays.asList("input", "output");
List<Map.Entry<Double, Integer>> entries = new ArrayList<>(categoryMap.entrySet());
Collections.sort(entries, VectorIndexerModelConverter.COMPARATOR);
for (Map.Entry<Double, Integer> entry : entries) {
String category = ValueUtil.formatValue(entry.getKey());
categories.add(category);
String value = ValueUtil.formatValue(entry.getValue());
values.add(value);
Row row = DOMUtil.createRow(documentBuilder, columns, Arrays.asList(category, value));
inlineTable.addRows(row);
}
encoder.toCategorical(feature.getName(), categories);
MapValues mapValues = new MapValues().addFieldColumnPairs(new FieldColumnPair(feature.getName(), columns.get(0))).setOutputColumn(columns.get(1)).setInlineTable(inlineTable);
DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CATEGORICAL, DataType.INTEGER, mapValues);
result.add(new CategoricalFeature(encoder, derivedField, values));
} else {
result.add((ContinuousFeature) feature);
}
}
return result;
}
use of org.dmg.pmml.FieldColumnPair in project jpmml-r by jpmml.
the class FormulaUtil method createMapValues.
private static MapValues createMapValues(FieldName name, Map<String, String> mapping, List<String> categories) {
Set<String> inputs = new LinkedHashSet<>(mapping.keySet());
Set<String> outputs = new LinkedHashSet<>(mapping.values());
for (String category : categories) {
// Assume disjoint input and output value spaces
if (outputs.contains(category)) {
continue;
}
mapping.put(category, category);
}
List<String> columns = Arrays.asList("from", "to");
InlineTable inlineTable = new InlineTable();
DocumentBuilder documentBuilder = DOMUtil.createDocumentBuilder();
Collection<Map.Entry<String, String>> entries = mapping.entrySet();
for (Map.Entry<String, String> entry : entries) {
Row row = DOMUtil.createRow(documentBuilder, columns, Arrays.asList(entry.getKey(), entry.getValue()));
inlineTable.addRows(row);
}
MapValues mapValues = new MapValues().addFieldColumnPairs(new FieldColumnPair(name, columns.get(0))).setOutputColumn(columns.get(1)).setInlineTable(inlineTable);
return mapValues;
}
use of org.dmg.pmml.FieldColumnPair in project jpmml-sparkml by jpmml.
the class ClassificationModelConverter method registerOutputFields.
@Override
public List<OutputField> registerOutputFields(Label label, SparkMLEncoder encoder) {
T model = getTransformer();
CategoricalLabel categoricalLabel = (CategoricalLabel) label;
List<OutputField> result = new ArrayList<>();
String predictionCol = model.getPredictionCol();
OutputField pmmlPredictedField = ModelUtil.createPredictedField(FieldName.create("pmml(" + predictionCol + ")"), categoricalLabel.getDataType(), OpType.CATEGORICAL);
result.add(pmmlPredictedField);
List<String> categories = new ArrayList<>();
DocumentBuilder documentBuilder = DOMUtil.createDocumentBuilder();
InlineTable inlineTable = new InlineTable();
List<String> columns = Arrays.asList("input", "output");
for (int i = 0; i < categoricalLabel.size(); i++) {
String value = categoricalLabel.getValue(i);
String category = String.valueOf(i);
categories.add(category);
Row row = DOMUtil.createRow(documentBuilder, columns, Arrays.asList(value, category));
inlineTable.addRows(row);
}
MapValues mapValues = new MapValues().addFieldColumnPairs(new FieldColumnPair(pmmlPredictedField.getName(), columns.get(0))).setOutputColumn(columns.get(1)).setInlineTable(inlineTable);
final OutputField predictedField = new OutputField(FieldName.create(predictionCol), DataType.DOUBLE).setOpType(OpType.CATEGORICAL).setResultFeature(ResultFeature.TRANSFORMED_VALUE).setExpression(mapValues);
result.add(predictedField);
Feature feature = new CategoricalFeature(encoder, predictedField.getName(), predictedField.getDataType(), categories) {
@Override
public ContinuousFeature toContinuousFeature() {
PMMLEncoder encoder = ensureEncoder();
return new ContinuousFeature(encoder, getName(), getDataType());
}
};
encoder.putOnlyFeature(predictionCol, feature);
if (model instanceof HasProbabilityCol) {
HasProbabilityCol hasProbabilityCol = (HasProbabilityCol) model;
String probabilityCol = hasProbabilityCol.getProbabilityCol();
List<Feature> features = new ArrayList<>();
for (int i = 0; i < categoricalLabel.size(); i++) {
String value = categoricalLabel.getValue(i);
OutputField probabilityField = ModelUtil.createProbabilityField(FieldName.create(probabilityCol + "(" + value + ")"), DataType.DOUBLE, value);
result.add(probabilityField);
features.add(new ContinuousFeature(encoder, probabilityField.getName(), probabilityField.getDataType()));
}
encoder.putFeatures(probabilityCol, features);
}
return result;
}
Aggregations