Search in sources :

Example 1 with DataTableDomainCreator

use of org.knime.core.data.DataTableDomainCreator in project knime-core by knime.

the class LogRegLearner method recalcDomainForTargetAndLearningFields.

private BufferedDataTable recalcDomainForTargetAndLearningFields(final BufferedDataTable data, final PMMLPortObjectSpec inPMMLSpec, final ExecutionContext exec) throws InvalidSettingsException, CanceledExecutionException {
    final String targetCol = m_pmmlOutSpec.getTargetFields().get(0);
    DataTableDomainCreator domainCreator = new DataTableDomainCreator(data.getDataTableSpec(), new DomainCreatorColumnSelection() {

        @Override
        public boolean dropDomain(final DataColumnSpec colSpec) {
            return false;
        }

        @Override
        public boolean createDomain(final DataColumnSpec colSpec) {
            return colSpec.getName().equals(targetCol) || (colSpec.getType().isCompatible(NominalValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName()));
        }
    }, new DomainCreatorColumnSelection() {

        @Override
        public boolean dropDomain(final DataColumnSpec colSpec) {
            // drop domain of numeric learning fields so that we can check for constant columns
            return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
        }

        @Override
        public boolean createDomain(final DataColumnSpec colSpec) {
            return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
        }
    });
    domainCreator.updateDomain(data, exec);
    DataTableSpec spec = domainCreator.createSpec();
    CheckUtils.checkSetting(spec.getColumnSpec(targetCol).getDomain().hasValues(), "Target column '%s' has too many" + " unique values - consider to use domain calucator node before to enforce calculation", targetCol);
    BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
    // bug fix 5580 - ignore columns with too many different values
    Set<String> columnWithTooManyDomainValues = new LinkedHashSet<>();
    for (String learningField : m_pmmlOutSpec.getLearningFields()) {
        DataColumnSpec columnSpec = spec.getColumnSpec(learningField);
        if (columnSpec.getType().isCompatible(NominalValue.class) && !columnSpec.getDomain().hasValues()) {
            columnWithTooManyDomainValues.add(learningField);
        }
    }
    if (!columnWithTooManyDomainValues.isEmpty()) {
        StringBuilder warning = new StringBuilder();
        warning.append(columnWithTooManyDomainValues.size() == 1 ? "Column " : "Columns ");
        warning.append(ConvenienceMethods.getShortStringFrom(columnWithTooManyDomainValues, 5));
        warning.append(columnWithTooManyDomainValues.size() == 1 ? " has " : " have ");
        warning.append("too many different values - will be ignored during training ");
        warning.append("(enforce inclusion by using a domain calculator node before)");
        LOGGER.warn(warning.toString());
        m_warningMessage = (m_warningMessage == null ? "" : m_warningMessage + "\n") + warning.toString();
    }
    // initialize m_learner so that it has the correct DataTableSpec of the input
    init(newDataTable.getDataTableSpec(), inPMMLSpec, columnWithTooManyDomainValues);
    return newDataTable;
}
Also used : DataTableDomainCreator(org.knime.core.data.DataTableDomainCreator) LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) NominalValue(org.knime.core.data.NominalValue) DomainCreatorColumnSelection(org.knime.core.data.DomainCreatorColumnSelection) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 2 with DataTableDomainCreator

use of org.knime.core.data.DataTableDomainCreator in project knime-core by knime.

the class LogRegLearner method recalcDomainForTargetAndLearningFields.

private BufferedDataTable recalcDomainForTargetAndLearningFields(final BufferedDataTable data, final ExecutionContext exec) throws InvalidSettingsException, CanceledExecutionException {
    final String targetCol = m_pmmlOutSpec.getTargetFields().get(0);
    DataTableDomainCreator domainCreator = new DataTableDomainCreator(data.getDataTableSpec(), new DomainCreatorColumnSelection() {

        @Override
        public boolean dropDomain(final DataColumnSpec colSpec) {
            return false;
        }

        @Override
        public boolean createDomain(final DataColumnSpec colSpec) {
            return colSpec.getName().equals(targetCol) || (colSpec.getType().isCompatible(NominalValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName()));
        }
    }, new DomainCreatorColumnSelection() {

        @Override
        public boolean dropDomain(final DataColumnSpec colSpec) {
            // drop domain of numeric learning fields so that we can check for constant columns
            return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
        }

        @Override
        public boolean createDomain(final DataColumnSpec colSpec) {
            return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
        }
    });
    domainCreator.updateDomain(data, exec);
    DataTableSpec spec = domainCreator.createSpec();
    CheckUtils.checkSetting(spec.getColumnSpec(targetCol).getDomain().hasValues(), "Target column '%s' has too many" + " unique values - consider to use domain calucator node before to enforce calculation", targetCol);
    BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
    // bug fix 5580 - ignore columns with too many different values
    Set<String> columnWithTooManyDomainValues = new LinkedHashSet<>();
    for (String learningField : m_pmmlOutSpec.getLearningFields()) {
        DataColumnSpec columnSpec = spec.getColumnSpec(learningField);
        if (columnSpec.getType().isCompatible(NominalValue.class) && !columnSpec.getDomain().hasValues()) {
            columnWithTooManyDomainValues.add(learningField);
        }
    }
    if (!columnWithTooManyDomainValues.isEmpty()) {
        StringBuilder warning = new StringBuilder();
        warning.append(columnWithTooManyDomainValues.size() == 1 ? "Column " : "Columns ");
        warning.append(ConvenienceMethods.getShortStringFrom(columnWithTooManyDomainValues, 5));
        warning.append(columnWithTooManyDomainValues.size() == 1 ? " has " : " have ");
        warning.append("too many different values - will be ignored during training ");
        warning.append("(enforce inclusion by using a domain calculator node before)");
        LOGGER.warn(warning.toString());
        m_warningMessage = (m_warningMessage == null ? "" : m_warningMessage + "\n") + warning.toString();
    }
    // initialize m_learner so that it has the correct DataTableSpec of the input
    init(newDataTable.getDataTableSpec(), columnWithTooManyDomainValues);
    return newDataTable;
}
Also used : DataTableDomainCreator(org.knime.core.data.DataTableDomainCreator) LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) NominalValue(org.knime.core.data.NominalValue) DomainCreatorColumnSelection(org.knime.core.data.DomainCreatorColumnSelection) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 3 with DataTableDomainCreator

use of org.knime.core.data.DataTableDomainCreator in project knime-core by knime.

the class DefaultDataArray method init.

private void init(final DataTable dTable, final int firstRow, final int numOfRows, final ExecutionMonitor execMon) throws CanceledExecutionException {
    if (dTable == null) {
        throw new IllegalArgumentException("Must provide non-null data table" + " for DataArray");
    }
    if (firstRow < 1) {
        throw new IllegalArgumentException("Starting row must be greater" + " than zero");
    }
    if (numOfRows < 0) {
        throw new IllegalArgumentException("Number of rows to read must be" + " greater than or equal zero");
    }
    DataTableSpec tSpec = dTable.getDataTableSpec();
    DataTableDomainCreator domainCreator = new DataTableDomainCreator(tSpec, true);
    int numOfColumns = tSpec.getNumColumns();
    m_firstRow = firstRow;
    m_rows = new ArrayList<DataRow>(numOfColumns);
    // now fill our data structures
    RowIterator rIter = dTable.iterator();
    int rowNumber = 0;
    while ((rIter.hasNext()) && (m_rows.size() < numOfRows)) {
        // get the next row
        DataRow row = rIter.next();
        rowNumber++;
        if (rowNumber < firstRow) {
            // skip all rows until we see the specified first row
            continue;
        }
        // store it.
        m_rows.add(row);
        domainCreator.updateDomain(row);
        // see if user wants us to stop
        if (execMon != null) {
            // will throw an exception if we are supposed to cancel
            execMon.checkCanceled();
            execMon.setProgress((double) m_rows.size() / (double) numOfRows, "read row " + m_rows.size() + " of max. " + numOfRows);
        }
    }
    if (rIter instanceof CloseableRowIterator) {
        ((CloseableRowIterator) rIter).close();
    }
    m_tSpec = domainCreator.createSpec();
}
Also used : DataTableDomainCreator(org.knime.core.data.DataTableDomainCreator) DataTableSpec(org.knime.core.data.DataTableSpec) CloseableRowIterator(org.knime.core.data.container.CloseableRowIterator) RowIterator(org.knime.core.data.RowIterator) DefaultRowIterator(org.knime.core.data.def.DefaultRowIterator) CloseableRowIterator(org.knime.core.data.container.CloseableRowIterator) DataRow(org.knime.core.data.DataRow)

Example 4 with DataTableDomainCreator

use of org.knime.core.data.DataTableDomainCreator in project knime-core by knime.

the class DomainNodeModel method getDomainCreator.

private DataTableDomainCreator getDomainCreator(final DataTableSpec inputSpec) {
    final Set<String> possValCols = new HashSet<String>();
    possValCols.addAll(Arrays.asList(m_possValConfig.applyTo(inputSpec).getIncludes()));
    int maxPoss = m_maxPossValues >= 0 ? m_maxPossValues : Integer.MAX_VALUE;
    final Set<String> minMaxCols = new HashSet<String>();
    minMaxCols.addAll(Arrays.asList(m_minMaxConfig.applyTo(inputSpec).getIncludes()));
    DomainCreatorColumnSelection possValueSelection = new DomainCreatorColumnSelection() {

        @Override
        public boolean createDomain(final DataColumnSpec colSpec) {
            return possValCols.contains(colSpec.getName());
        }

        @Override
        public boolean dropDomain(final DataColumnSpec colSpec) {
            return possValCols.contains(colSpec.getName()) || !m_possValRetainUnselected;
        }
    };
    DomainCreatorColumnSelection minMaxSelection = new DomainCreatorColumnSelection() {

        @Override
        public boolean createDomain(final DataColumnSpec colSpec) {
            return minMaxCols.contains(colSpec.getName());
        }

        @Override
        public boolean dropDomain(final DataColumnSpec colSpec) {
            return minMaxCols.contains(colSpec.getName()) || !m_minMaxRetainUnselected;
        }
    };
    DataTableDomainCreator domainCreator = new DataTableDomainCreator(inputSpec, possValueSelection, minMaxSelection);
    domainCreator.setMaxPossibleValues(maxPoss);
    return domainCreator;
}
Also used : DataTableDomainCreator(org.knime.core.data.DataTableDomainCreator) DataColumnSpec(org.knime.core.data.DataColumnSpec) DomainCreatorColumnSelection(org.knime.core.data.DomainCreatorColumnSelection) HashSet(java.util.HashSet)

Example 5 with DataTableDomainCreator

use of org.knime.core.data.DataTableDomainCreator in project knime-core by knime.

the class DomainNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    DataTableDomainCreator domainCreator = getDomainCreator(inData[0].getDataTableSpec());
    domainCreator.updateDomain(inData[0], exec, inData[0].size());
    return new BufferedDataTable[] { exec.createSpecReplacerTable(inData[0], domainCreator.createSpec()) };
}
Also used : DataTableDomainCreator(org.knime.core.data.DataTableDomainCreator) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Aggregations

DataTableDomainCreator (org.knime.core.data.DataTableDomainCreator)10 DataColumnSpec (org.knime.core.data.DataColumnSpec)6 DataTableSpec (org.knime.core.data.DataTableSpec)6 DomainCreatorColumnSelection (org.knime.core.data.DomainCreatorColumnSelection)6 BufferedDataTable (org.knime.core.node.BufferedDataTable)6 NominalValue (org.knime.core.data.NominalValue)5 LinkedHashSet (java.util.LinkedHashSet)4 HashSet (java.util.HashSet)2 BoundedValue (org.knime.core.data.BoundedValue)1 DataRow (org.knime.core.data.DataRow)1 RowIterator (org.knime.core.data.RowIterator)1 CloseableRowIterator (org.knime.core.data.container.CloseableRowIterator)1 DefaultRowIterator (org.knime.core.data.def.DefaultRowIterator)1 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)1 FilterResult (org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult)1