use of org.knime.core.data.DomainCreatorColumnSelection in project knime-core by knime.
the class LinReg2Learner method recalcDomainOfLearningFields.
private BufferedDataTable recalcDomainOfLearningFields(final BufferedDataTable data, final PMMLPortObjectSpec inPMMLSpec, final ExecutionContext exec) throws InvalidSettingsException, CanceledExecutionException {
DataTableDomainCreator domainCreator = new DataTableDomainCreator(data.getDataTableSpec(), new DomainCreatorColumnSelection() {
@Override
public boolean dropDomain(final DataColumnSpec colSpec) {
return true;
}
@Override
public boolean createDomain(final DataColumnSpec colSpec) {
return colSpec.getType().isCompatible(NominalValue.class) && (m_pmmlOutSpec.getLearningFields().contains(colSpec.getName()) || m_pmmlOutSpec.getTargetFields().contains(colSpec.getName()));
}
}, new DomainCreatorColumnSelection() {
@Override
public boolean dropDomain(final DataColumnSpec colSpec) {
return false;
}
@Override
public boolean createDomain(final DataColumnSpec colSpec) {
return false;
}
});
domainCreator.updateDomain(data, exec);
DataTableSpec spec = domainCreator.createSpec();
BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
// bug fix 5793, similar to 5580 in LogReg2Learner - ignore columns with too many different values.
// But because this would change behavior, we cannot drop the domain, which means that even
// prepending a domain calculator to this node will node help when the column has too many values.
Set<String> columnWithTooManyDomainValues = new LinkedHashSet<>();
for (String learningField : m_pmmlOutSpec.getLearningFields()) {
DataColumnSpec columnSpec = spec.getColumnSpec(learningField);
if (columnSpec.getType().isCompatible(NominalValue.class) && !columnSpec.getDomain().hasValues()) {
columnWithTooManyDomainValues.add(learningField);
}
}
// initialize m_learner so that it has the correct DataTableSpec of
// the input
init(newDataTable.getDataTableSpec(), inPMMLSpec, columnWithTooManyDomainValues);
if (!columnWithTooManyDomainValues.isEmpty()) {
StringBuilder warning = new StringBuilder();
warning.append(columnWithTooManyDomainValues.size() == 1 ? "Column " : "Columns ");
warning.append(ConvenienceMethods.getShortStringFrom(columnWithTooManyDomainValues, 5));
warning.append(columnWithTooManyDomainValues.size() == 1 ? " has " : " have ");
warning.append("too many different values - will be ignored during training");
// warning.append("(enforce inclusion by using a domain calculator node before)");
LOGGER.warn(warning.toString());
m_warningMessage = (m_warningMessage == null ? "" : m_warningMessage + "\n") + warning.toString();
}
return newDataTable;
}
Aggregations