Search in sources :

Example 66 with BufferedDataTable

use of org.knime.core.node.BufferedDataTable in project knime-core by knime.

the class LogRegLearner method execute.

/**
 * Compute logistic regression model.
 *
 * @param portObjects The input objects.
 * @param exec the execution context
 * @return a {@link LogisticRegressionContent} storing computed data
 * @throws Exception if computation of the logistic regression model is not successful or if given data is
 *             inconsistent with the settings defined in the constructor.
 * @see LogRegLearnerNodeModel#execute(PortObject[], ExecutionContext)
 */
public LogisticRegressionContent execute(final PortObject[] portObjects, final ExecutionContext exec) throws Exception {
    BufferedDataTable data = (BufferedDataTable) portObjects[0];
    init(data.getDataTableSpec(), Collections.<String>emptySet());
    // the learner typically needs five steps with two runs over the data each step, calculating
    // the domain needs run over the data
    double calcDomainTime = 1.0 / (5.0 * 2.0 + 1.0);
    exec.setMessage("Analyzing categorical data");
    BufferedDataTable dataTable = recalcDomainForTargetAndLearningFields(data, exec.createSubExecutionContext(calcDomainTime));
    checkConstantLearningFields(dataTable);
    exec.setMessage("Building logistic regression model");
    return m_learner.perform(dataTable, exec.createSubExecutionContext(1.0 - calcDomainTime));
}
Also used : BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 67 with BufferedDataTable

use of org.knime.core.node.BufferedDataTable in project knime-core by knime.

the class LogRegLearner method recalcDomainForTargetAndLearningFields.

private BufferedDataTable recalcDomainForTargetAndLearningFields(final BufferedDataTable data, final ExecutionContext exec) throws InvalidSettingsException, CanceledExecutionException {
    final String targetCol = m_pmmlOutSpec.getTargetFields().get(0);
    DataTableDomainCreator domainCreator = new DataTableDomainCreator(data.getDataTableSpec(), new DomainCreatorColumnSelection() {

        @Override
        public boolean dropDomain(final DataColumnSpec colSpec) {
            return false;
        }

        @Override
        public boolean createDomain(final DataColumnSpec colSpec) {
            return colSpec.getName().equals(targetCol) || (colSpec.getType().isCompatible(NominalValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName()));
        }
    }, new DomainCreatorColumnSelection() {

        @Override
        public boolean dropDomain(final DataColumnSpec colSpec) {
            // drop domain of numeric learning fields so that we can check for constant columns
            return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
        }

        @Override
        public boolean createDomain(final DataColumnSpec colSpec) {
            return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
        }
    });
    domainCreator.updateDomain(data, exec);
    DataTableSpec spec = domainCreator.createSpec();
    CheckUtils.checkSetting(spec.getColumnSpec(targetCol).getDomain().hasValues(), "Target column '%s' has too many" + " unique values - consider to use domain calucator node before to enforce calculation", targetCol);
    BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
    // bug fix 5580 - ignore columns with too many different values
    Set<String> columnWithTooManyDomainValues = new LinkedHashSet<>();
    for (String learningField : m_pmmlOutSpec.getLearningFields()) {
        DataColumnSpec columnSpec = spec.getColumnSpec(learningField);
        if (columnSpec.getType().isCompatible(NominalValue.class) && !columnSpec.getDomain().hasValues()) {
            columnWithTooManyDomainValues.add(learningField);
        }
    }
    if (!columnWithTooManyDomainValues.isEmpty()) {
        StringBuilder warning = new StringBuilder();
        warning.append(columnWithTooManyDomainValues.size() == 1 ? "Column " : "Columns ");
        warning.append(ConvenienceMethods.getShortStringFrom(columnWithTooManyDomainValues, 5));
        warning.append(columnWithTooManyDomainValues.size() == 1 ? " has " : " have ");
        warning.append("too many different values - will be ignored during training ");
        warning.append("(enforce inclusion by using a domain calculator node before)");
        LOGGER.warn(warning.toString());
        m_warningMessage = (m_warningMessage == null ? "" : m_warningMessage + "\n") + warning.toString();
    }
    // initialize m_learner so that it has the correct DataTableSpec of the input
    init(newDataTable.getDataTableSpec(), columnWithTooManyDomainValues);
    return newDataTable;
}
Also used : DataTableDomainCreator(org.knime.core.data.DataTableDomainCreator) LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) NominalValue(org.knime.core.data.NominalValue) DomainCreatorColumnSelection(org.knime.core.data.DomainCreatorColumnSelection) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 68 with BufferedDataTable

use of org.knime.core.node.BufferedDataTable in project knime-core by knime.

the class RegressionPredictorNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
public PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    PMMLPortObject regModel = (PMMLPortObject) inData[0];
    List<Node> models = regModel.getPMMLValue().getModels(PMMLModelType.RegressionModel);
    if (models.isEmpty()) {
        String msg = "No Regression Model found.";
        LOGGER.error(msg);
        throw new RuntimeException(msg);
    }
    PMMLRegressionTranslator trans = new PMMLRegressionTranslator();
    regModel.initializeModelTranslator(trans);
    BufferedDataTable data = (BufferedDataTable) inData[1];
    DataTableSpec spec = data.getDataTableSpec();
    ColumnRearranger c = createRearranger(spec, regModel.getSpec(), trans);
    BufferedDataTable out = exec.createColumnRearrangeTable(data, c, exec);
    return new BufferedDataTable[] { out };
}
Also used : PMMLRegressionTranslator(org.knime.base.node.mine.regression.PMMLRegressionTranslator) DataTableSpec(org.knime.core.data.DataTableSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) Node(org.w3c.dom.Node) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 69 with BufferedDataTable

use of org.knime.core.node.BufferedDataTable in project knime-core by knime.

the class MissingValueHandling3Table method createMissingValueHandlingTable.

/**
 * Does missing value handling to the argument table given the col settings in an array and also reports progress.
 *
 * @param table the table to do missing value handling on
 * @param colSettings the settings
 * @param exec for progress/cancel and to create the buffered data table
 * @param warningBuffer To which potential warning messages are added.
 * @return a cache table, cleaned up
 * @throws CanceledExecutionException if canceled
 * @since 2.8
 * @deprecated use {@link #createMissingValueHandlingTable(BufferedDataTable, MissingValueHandling2ColSetting[],
 *              ExecutionContext, StringBuilder)} instead
 */
@Deprecated
public static BufferedDataTable createMissingValueHandlingTable(final BufferedDataTable table, final MissingValueHandling2ColSetting[] colSettings, final ExecutionContext exec, final StringBuffer warningBuffer) throws CanceledExecutionException {
    StringBuilder temp = new StringBuilder();
    BufferedDataTable outTable = createMissingValueHandlingTable(table, colSettings, exec, temp);
    warningBuffer.append(temp);
    return outTable;
}
Also used : BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 70 with BufferedDataTable

use of org.knime.core.node.BufferedDataTable in project knime-core by knime.

the class PortObjectRepository method copy.

/**
 * Copies the argument object by means of the associated serializer.
 * @param object The port object to be copied.
 * @param exec Host for BDTs being created
 * @param progress For progress/cancelation
 * @return The deep copy.
 * @throws IOException In case of exceptions while accessing the streams
 * @throws CanceledExecutionException If canceled.
 */
public static final PortObject copy(final PortObject object, final ExecutionContext exec, final ExecutionMonitor progress) throws IOException, CanceledExecutionException {
    if (object instanceof BufferedDataTable) {
        // need to copy the table cell by cell
        // this is to workaround the standard knime philosophy according
        // to which tables are referenced. A row-based copy will not work
        // as it still will reference blobs
        BufferedDataTable in = (BufferedDataTable) object;
        BufferedDataContainer con = exec.createDataContainer(in.getSpec(), true, 0);
        final long rowCount = in.size();
        long row = 0;
        boolean hasLoggedCloneProblem = false;
        for (DataRow r : in) {
            DataCell[] cells = new DataCell[r.getNumCells()];
            for (int i = 0; i < cells.length; i++) {
                // deserialize blob
                DataCell c = r.getCell(i);
                if (c instanceof BlobDataCell) {
                    try {
                        c = cloneBlobCell(c);
                    } catch (Exception e) {
                        if (!hasLoggedCloneProblem) {
                            LOGGER.warn("Can't clone blob object: " + e.getMessage(), e);
                            hasLoggedCloneProblem = true;
                            LOGGER.debug("Suppressing futher warnings.");
                        }
                    }
                }
                cells[i] = c;
            }
            con.addRowToTable(new DefaultRow(r.getKey(), cells));
            progress.setProgress(row / (double) rowCount, "Copied row " + row + "/" + rowCount);
            progress.checkCanceled();
            row++;
        }
        con.close();
        return con.getTable();
    }
    return Node.copyPortObject(object, exec);
}
Also used : BlobDataCell(org.knime.core.data.container.BlobDataCell) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) BufferedDataTable(org.knime.core.node.BufferedDataTable) BlobDataCell(org.knime.core.data.container.BlobDataCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) DataRow(org.knime.core.data.DataRow) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException)

Aggregations

BufferedDataTable (org.knime.core.node.BufferedDataTable)425 DataTableSpec (org.knime.core.data.DataTableSpec)213 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)148 DataRow (org.knime.core.data.DataRow)118 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)97 PortObject (org.knime.core.node.port.PortObject)96 DataCell (org.knime.core.data.DataCell)85 DataColumnSpec (org.knime.core.data.DataColumnSpec)61 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)60 DefaultRow (org.knime.core.data.def.DefaultRow)56 PMMLPortObject (org.knime.core.node.port.pmml.PMMLPortObject)54 RowKey (org.knime.core.data.RowKey)52 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)50 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)47 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)43 IOException (java.io.IOException)41 ExecutionContext (org.knime.core.node.ExecutionContext)40 ArrayList (java.util.ArrayList)33 LinkedHashMap (java.util.LinkedHashMap)31 DoubleValue (org.knime.core.data.DoubleValue)29