use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class CategoryToNumberApplyNodeModel method createRearranger.
/**
* Creates a rearranger that processes the derived fields with MapValues
* in the given model.
*/
private ColumnRearranger createRearranger(final DataTableSpec spec, final PMMLPortObject model) {
// Retrieve columns with string data in the spec
Set<String> stringCols = new LinkedHashSet<String>();
Set<String> otherCols = new LinkedHashSet<String>();
for (DataColumnSpec colSpec : spec) {
if (colSpec.getType().isCompatible(StringValue.class)) {
stringCols.add(colSpec.getName());
} else {
otherCols.add(colSpec.getName());
}
}
if (stringCols.isEmpty()) {
if (null == model) {
// during configure
setWarningMessage("No columns to process.");
} else {
// during execute
setWarningMessage("No columns to process, returning input.");
}
}
// The map values in the model if present
Map<String, DerivedField> mapValues = null != model ? getMapValues(model) : Collections.<String, DerivedField>emptyMap();
// Create rearranger
ColumnRearranger rearranger = new ColumnRearranger(spec);
for (String col : mapValues.keySet()) {
DerivedField derivedField = mapValues.get(col);
MapValues map = derivedField.getMapValues();
// this PMML MapValues model is found but has wrong type.
if (null != model && otherCols.contains(col)) {
String outColumn = null == derivedField.getDisplayName() || derivedField.getDisplayName().trim().isEmpty() ? derivedField.getName() : derivedField.getDisplayName();
LOGGER.warn("Cannot create column \"" + outColumn + "\" since the input column \"" + col + "\" is not of type StringValue.");
continue;
}
// this PMML MapValues model.
if (null != model && !stringCols.contains(col)) {
String outColumn = null == derivedField.getDisplayName() || derivedField.getDisplayName().trim().isEmpty() ? derivedField.getName() : derivedField.getDisplayName();
LOGGER.warn("Cannot create column \"" + outColumn + "\" since the column \"" + col + "\" is not in the input.");
continue;
}
CategoryToNumberApplyCellFactory factory = new CategoryToNumberApplyCellFactory(spec, col, m_settings, map);
if (m_settings.getAppendColumns()) {
rearranger.append(factory);
} else {
rearranger.replace(factory, col);
}
}
return rearranger;
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class PMMLMapValuesTranslator method createDerivedFields.
private DerivedField[] createDerivedFields() {
DerivedField df = DerivedField.Factory.newInstance();
df.setExtensionArray(createSummaryExtension());
/* The field name must be retrieved before creating a new derived
* name for this derived field as the map only contains the
* current mapping. */
String fieldName = m_mapper.getDerivedFieldName(m_config.getInColumn());
if (m_config.getInColumn().equals(m_config.getOutColumn())) {
String name = m_config.getInColumn();
df.setDisplayName(name);
df.setName(m_mapper.createDerivedFieldName(name));
} else {
df.setName(m_config.getOutColumn());
}
df.setOptype(m_config.getOpType());
df.setDataType(m_config.getOutDataType());
MapValues mapValues = df.addNewMapValues();
// the element in the InlineTable representing the output column
// Use dummy name instead of m_config.getOutColumn() since the
// input column could contain characters that are not allowed in XML
final QName xmlOut = new QName("http://www.dmg.org/PMML-4_0", "out");
mapValues.setOutputColumn(xmlOut.getLocalPart());
mapValues.setDataType(m_config.getOutDataType());
if (!m_config.getDefaultValue().isMissing()) {
mapValues.setDefaultValue(m_config.getDefaultValue().toString());
}
if (!m_config.getMapMissingTo().isMissing()) {
mapValues.setMapMissingTo(m_config.getMapMissingTo().toString());
}
// the mapping of input field <-> element in the InlineTable
FieldColumnPair fieldColPair = mapValues.addNewFieldColumnPair();
fieldColPair.setField(fieldName);
// Use dummy name instead of m_config.getInColumn() since the
// input column could contain characters that are not allowed in XML
final QName xmlIn = new QName("http://www.dmg.org/PMML-4_0", "in");
fieldColPair.setColumn(xmlIn.getLocalPart());
InlineTable table = mapValues.addNewInlineTable();
for (Entry<DataCell, ? extends DataCell> entry : m_config.getEntries().entrySet()) {
Row row = table.addNewRow();
XmlCursor cursor = row.newCursor();
cursor.toNextToken();
cursor.insertElementWithText(xmlIn, entry.getKey().toString());
cursor.insertElementWithText(xmlOut, entry.getValue().toString());
cursor.dispose();
}
return new DerivedField[] { df };
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class NumberToCategoryApplyNodeModel method getMapValues.
/**
* @param model the PMML model
* @return the field in the first FieldColumnPair of the MapValues mapped
* to the MapValues Model
*/
private Map<String, DerivedField> getMapValues(final PMMLPortObject model) {
Map<String, DerivedField> mapValues = new LinkedHashMap<String, DerivedField>();
DerivedField[] derivedFields = model.getDerivedFields();
for (DerivedField derivedField : derivedFields) {
MapValues map = derivedField.getMapValues();
if (null != map) {
// This is the field name the mapValues is based on
String name = derivedField.getDisplayName();
if (name == null) {
name = derivedField.getName();
}
mapValues.put(name, derivedField);
}
}
return mapValues;
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class PMMLOne2ManyTranslator method createDerivedFields.
private DerivedField[] createDerivedFields() {
List<DerivedField> derivedFields = new ArrayList<DerivedField>();
for (Map.Entry<String, List<Pair<String, String>>> entry : m_columnMapping.entrySet()) {
String columnName = entry.getKey();
String derivedName = m_mapper.getDerivedFieldName(columnName);
for (Pair<String, String> nameValue : entry.getValue()) {
DerivedField derivedField = DerivedField.Factory.newInstance();
derivedField.setName(nameValue.getFirst());
derivedField.setOptype(OPTYPE.ORDINAL);
derivedField.setDataType(DATATYPE.INTEGER);
NormDiscrete normDiscrete = derivedField.addNewNormDiscrete();
normDiscrete.setField(derivedName);
normDiscrete.setValue(nameValue.getSecond());
normDiscrete.setMapMissingTo(0);
derivedFields.add(derivedField);
}
}
return derivedFields.toArray(new DerivedField[0]);
}
use of org.dmg.pmml.DerivedFieldDocument.DerivedField in project knime-core by knime.
the class MissingCellReplacingDataTable method prepareHandlers.
/**
* @param inTableSpec
* @param pmmlDoc
* @return
* @throws InvalidSettingsException
*/
private MissingCellHandler[] prepareHandlers(final DataTableSpec inTableSpec, final PMMLDocument pmmlDoc) throws InvalidSettingsException {
MissingCellHandler[] handlers = new MissingCellHandler[inTableSpec.getNumColumns()];
if (pmmlDoc.getPMML().getTransformationDictionary() == null || pmmlDoc.getPMML().getTransformationDictionary().getDerivedFieldList().size() == 0) {
for (int i = 0; i < inTableSpec.getNumColumns(); i++) {
handlers[i] = DoNothingMissingCellHandlerFactory.getInstance().createHandler(inTableSpec.getColumnSpec(i));
}
return handlers;
}
DerivedFieldMapper mapper = new DerivedFieldMapper(pmmlDoc);
Map<String, DerivedField> derivedFields = new HashMap<>();
for (DerivedField df : pmmlDoc.getPMML().getTransformationDictionary().getDerivedFieldList()) {
String name = mapper.getColumnName(df.getName());
derivedFields.put(name, df);
}
for (int i = 0; i < inTableSpec.getNumColumns(); i++) {
DataColumnSpec spec = inTableSpec.getColumnSpec(i);
handlers[i] = createHandlerForColumn(spec, derivedFields.get(spec.getName()));
}
return handlers;
}
Aggregations