use of org.knime.core.data.DataTableDomainCreator in project knime-core by knime.
the class LogRegLearner method recalcDomainForTargetAndLearningFields.
private BufferedDataTable recalcDomainForTargetAndLearningFields(final BufferedDataTable data, final PMMLPortObjectSpec inPMMLSpec, final ExecutionContext exec) throws InvalidSettingsException, CanceledExecutionException {
final String targetCol = m_pmmlOutSpec.getTargetFields().get(0);
DataTableDomainCreator domainCreator = new DataTableDomainCreator(data.getDataTableSpec(), new DomainCreatorColumnSelection() {
@Override
public boolean dropDomain(final DataColumnSpec colSpec) {
return false;
}
@Override
public boolean createDomain(final DataColumnSpec colSpec) {
return colSpec.getName().equals(targetCol) || (colSpec.getType().isCompatible(NominalValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName()));
}
}, new DomainCreatorColumnSelection() {
@Override
public boolean dropDomain(final DataColumnSpec colSpec) {
// drop domain of numeric learning fields so that we can check for constant columns
return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
}
@Override
public boolean createDomain(final DataColumnSpec colSpec) {
return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
}
});
domainCreator.updateDomain(data, exec);
DataTableSpec spec = domainCreator.createSpec();
CheckUtils.checkSetting(spec.getColumnSpec(targetCol).getDomain().hasValues(), "Target column '%s' has too many" + " unique values - consider to use domain calucator node before to enforce calculation", targetCol);
BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
// bug fix 5580 - ignore columns with too many different values
Set<String> columnWithTooManyDomainValues = new LinkedHashSet<>();
for (String learningField : m_pmmlOutSpec.getLearningFields()) {
DataColumnSpec columnSpec = spec.getColumnSpec(learningField);
if (columnSpec.getType().isCompatible(NominalValue.class) && !columnSpec.getDomain().hasValues()) {
columnWithTooManyDomainValues.add(learningField);
}
}
if (!columnWithTooManyDomainValues.isEmpty()) {
StringBuilder warning = new StringBuilder();
warning.append(columnWithTooManyDomainValues.size() == 1 ? "Column " : "Columns ");
warning.append(ConvenienceMethods.getShortStringFrom(columnWithTooManyDomainValues, 5));
warning.append(columnWithTooManyDomainValues.size() == 1 ? " has " : " have ");
warning.append("too many different values - will be ignored during training ");
warning.append("(enforce inclusion by using a domain calculator node before)");
LOGGER.warn(warning.toString());
m_warningMessage = (m_warningMessage == null ? "" : m_warningMessage + "\n") + warning.toString();
}
// initialize m_learner so that it has the correct DataTableSpec of the input
init(newDataTable.getDataTableSpec(), inPMMLSpec, columnWithTooManyDomainValues);
return newDataTable;
}
use of org.knime.core.data.DataTableDomainCreator in project knime-core by knime.
the class LogRegLearner method recalcDomainForTargetAndLearningFields.
private BufferedDataTable recalcDomainForTargetAndLearningFields(final BufferedDataTable data, final ExecutionContext exec) throws InvalidSettingsException, CanceledExecutionException {
final String targetCol = m_pmmlOutSpec.getTargetFields().get(0);
DataTableDomainCreator domainCreator = new DataTableDomainCreator(data.getDataTableSpec(), new DomainCreatorColumnSelection() {
@Override
public boolean dropDomain(final DataColumnSpec colSpec) {
return false;
}
@Override
public boolean createDomain(final DataColumnSpec colSpec) {
return colSpec.getName().equals(targetCol) || (colSpec.getType().isCompatible(NominalValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName()));
}
}, new DomainCreatorColumnSelection() {
@Override
public boolean dropDomain(final DataColumnSpec colSpec) {
// drop domain of numeric learning fields so that we can check for constant columns
return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
}
@Override
public boolean createDomain(final DataColumnSpec colSpec) {
return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
}
});
domainCreator.updateDomain(data, exec);
DataTableSpec spec = domainCreator.createSpec();
CheckUtils.checkSetting(spec.getColumnSpec(targetCol).getDomain().hasValues(), "Target column '%s' has too many" + " unique values - consider to use domain calucator node before to enforce calculation", targetCol);
BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
// bug fix 5580 - ignore columns with too many different values
Set<String> columnWithTooManyDomainValues = new LinkedHashSet<>();
for (String learningField : m_pmmlOutSpec.getLearningFields()) {
DataColumnSpec columnSpec = spec.getColumnSpec(learningField);
if (columnSpec.getType().isCompatible(NominalValue.class) && !columnSpec.getDomain().hasValues()) {
columnWithTooManyDomainValues.add(learningField);
}
}
if (!columnWithTooManyDomainValues.isEmpty()) {
StringBuilder warning = new StringBuilder();
warning.append(columnWithTooManyDomainValues.size() == 1 ? "Column " : "Columns ");
warning.append(ConvenienceMethods.getShortStringFrom(columnWithTooManyDomainValues, 5));
warning.append(columnWithTooManyDomainValues.size() == 1 ? " has " : " have ");
warning.append("too many different values - will be ignored during training ");
warning.append("(enforce inclusion by using a domain calculator node before)");
LOGGER.warn(warning.toString());
m_warningMessage = (m_warningMessage == null ? "" : m_warningMessage + "\n") + warning.toString();
}
// initialize m_learner so that it has the correct DataTableSpec of the input
init(newDataTable.getDataTableSpec(), columnWithTooManyDomainValues);
return newDataTable;
}
use of org.knime.core.data.DataTableDomainCreator in project knime-core by knime.
the class DefaultDataArray method init.
private void init(final DataTable dTable, final int firstRow, final int numOfRows, final ExecutionMonitor execMon) throws CanceledExecutionException {
if (dTable == null) {
throw new IllegalArgumentException("Must provide non-null data table" + " for DataArray");
}
if (firstRow < 1) {
throw new IllegalArgumentException("Starting row must be greater" + " than zero");
}
if (numOfRows < 0) {
throw new IllegalArgumentException("Number of rows to read must be" + " greater than or equal zero");
}
DataTableSpec tSpec = dTable.getDataTableSpec();
DataTableDomainCreator domainCreator = new DataTableDomainCreator(tSpec, true);
int numOfColumns = tSpec.getNumColumns();
m_firstRow = firstRow;
m_rows = new ArrayList<DataRow>(numOfColumns);
// now fill our data structures
RowIterator rIter = dTable.iterator();
int rowNumber = 0;
while ((rIter.hasNext()) && (m_rows.size() < numOfRows)) {
// get the next row
DataRow row = rIter.next();
rowNumber++;
if (rowNumber < firstRow) {
// skip all rows until we see the specified first row
continue;
}
// store it.
m_rows.add(row);
domainCreator.updateDomain(row);
// see if user wants us to stop
if (execMon != null) {
// will throw an exception if we are supposed to cancel
execMon.checkCanceled();
execMon.setProgress((double) m_rows.size() / (double) numOfRows, "read row " + m_rows.size() + " of max. " + numOfRows);
}
}
if (rIter instanceof CloseableRowIterator) {
((CloseableRowIterator) rIter).close();
}
m_tSpec = domainCreator.createSpec();
}
use of org.knime.core.data.DataTableDomainCreator in project knime-core by knime.
the class DomainNodeModel method getDomainCreator.
private DataTableDomainCreator getDomainCreator(final DataTableSpec inputSpec) {
final Set<String> possValCols = new HashSet<String>();
possValCols.addAll(Arrays.asList(m_possValConfig.applyTo(inputSpec).getIncludes()));
int maxPoss = m_maxPossValues >= 0 ? m_maxPossValues : Integer.MAX_VALUE;
final Set<String> minMaxCols = new HashSet<String>();
minMaxCols.addAll(Arrays.asList(m_minMaxConfig.applyTo(inputSpec).getIncludes()));
DomainCreatorColumnSelection possValueSelection = new DomainCreatorColumnSelection() {
@Override
public boolean createDomain(final DataColumnSpec colSpec) {
return possValCols.contains(colSpec.getName());
}
@Override
public boolean dropDomain(final DataColumnSpec colSpec) {
return possValCols.contains(colSpec.getName()) || !m_possValRetainUnselected;
}
};
DomainCreatorColumnSelection minMaxSelection = new DomainCreatorColumnSelection() {
@Override
public boolean createDomain(final DataColumnSpec colSpec) {
return minMaxCols.contains(colSpec.getName());
}
@Override
public boolean dropDomain(final DataColumnSpec colSpec) {
return minMaxCols.contains(colSpec.getName()) || !m_minMaxRetainUnselected;
}
};
DataTableDomainCreator domainCreator = new DataTableDomainCreator(inputSpec, possValueSelection, minMaxSelection);
domainCreator.setMaxPossibleValues(maxPoss);
return domainCreator;
}
use of org.knime.core.data.DataTableDomainCreator in project knime-core by knime.
the class DomainNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
DataTableDomainCreator domainCreator = getDomainCreator(inData[0].getDataTableSpec());
domainCreator.updateDomain(inData[0], exec, inData[0].size());
return new BufferedDataTable[] { exec.createSpecReplacerTable(inData[0], domainCreator.createSpec()) };
}
Aggregations