Search in sources :

Example 91 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class RuleEngineNodeModel method createRearranger.

private ColumnRearranger createRearranger(final DataTableSpec inSpec, final List<Rule> rules) throws InvalidSettingsException {
    ColumnRearranger crea = new ColumnRearranger(inSpec);
    String newColName = DataTableSpec.getUniqueColumnName(inSpec, m_settings.getNewColName());
    final int defaultLabelColumnIndex;
    if (m_settings.getDefaultLabelIsColumn()) {
        if (m_settings.getDefaultLabel().length() < 3) {
            throw new InvalidSettingsException("Default label is not a column reference");
        }
        if (!m_settings.getDefaultLabel().startsWith("$") || !m_settings.getDefaultLabel().endsWith("$")) {
            throw new InvalidSettingsException("Column references in default label must be enclosed in $");
        }
        String colRef = m_settings.getDefaultLabel().substring(1, m_settings.getDefaultLabel().length() - 1);
        defaultLabelColumnIndex = inSpec.findColumnIndex(colRef);
        if (defaultLabelColumnIndex == -1) {
            throw new InvalidSettingsException("Column '" + m_settings.getDefaultLabel() + "' for default label does not exist in input table");
        }
    } else {
        defaultLabelColumnIndex = -1;
    }
    // determine output type
    List<DataType> types = new ArrayList<DataType>();
    // add outcome column types
    for (Rule r : rules) {
        if (r.getOutcome() instanceof ColumnReference) {
            types.add(((ColumnReference) r.getOutcome()).spec.getType());
        } else if (r.getOutcome() instanceof Double) {
            types.add(DoubleCell.TYPE);
        } else if (r.getOutcome() instanceof Integer) {
            types.add(IntCell.TYPE);
        } else if (r.getOutcome().toString().length() > 0) {
            types.add(StringCell.TYPE);
        }
    }
    if (defaultLabelColumnIndex >= 0) {
        types.add(inSpec.getColumnSpec(defaultLabelColumnIndex).getType());
    } else if (m_settings.getDefaultLabel().length() > 0) {
        try {
            Integer.parseInt(m_settings.getDefaultLabel());
            types.add(IntCell.TYPE);
        } catch (NumberFormatException ex) {
            try {
                Double.parseDouble(m_settings.getDefaultLabel());
                types.add(DoubleCell.TYPE);
            } catch (NumberFormatException ex1) {
                types.add(StringCell.TYPE);
            }
        }
    }
    final DataType outType;
    if (types.size() > 0) {
        DataType temp = types.get(0);
        for (int i = 1; i < types.size(); i++) {
            temp = DataType.getCommonSuperType(temp, types.get(i));
        }
        if ((temp.getValueClasses().size() == 1) && temp.getValueClasses().contains(DataValue.class)) {
            // a non-native type, we replace it with string
            temp = StringCell.TYPE;
        }
        outType = temp;
    } else {
        outType = StringCell.TYPE;
    }
    DataColumnSpec cs = new DataColumnSpecCreator(newColName, outType).createSpec();
    crea.append(new SingleCellFactory(cs) {

        @Override
        public DataCell getCell(final DataRow row) {
            for (Rule r : rules) {
                if (r.matches(row)) {
                    Object outcome = r.getOutcome();
                    if (outcome instanceof ColumnReference) {
                        DataCell cell = row.getCell(((ColumnReference) outcome).index);
                        if (outType.equals(StringCell.TYPE) && !cell.isMissing() && !cell.getType().equals(StringCell.TYPE)) {
                            return new StringCell(cell.toString());
                        } else {
                            return cell;
                        }
                    } else if (outType.equals(IntCell.TYPE)) {
                        return new IntCell((Integer) outcome);
                    } else if (outType.equals(DoubleCell.TYPE)) {
                        return new DoubleCell((Double) outcome);
                    } else {
                        return new StringCell(outcome.toString());
                    }
                }
            }
            if (defaultLabelColumnIndex >= 0) {
                DataCell cell = row.getCell(defaultLabelColumnIndex);
                if (outType.equals(StringCell.TYPE) && !cell.getType().equals(StringCell.TYPE)) {
                    return new StringCell(cell.toString());
                } else {
                    return cell;
                }
            } else if (m_settings.getDefaultLabel().length() > 0) {
                String l = m_settings.getDefaultLabel();
                if (outType.equals(StringCell.TYPE)) {
                    return new StringCell(l);
                }
                try {
                    int i = Integer.parseInt(l);
                    return new IntCell(i);
                } catch (NumberFormatException ex) {
                    try {
                        double d = Double.parseDouble(l);
                        return new DoubleCell(d);
                    } catch (NumberFormatException ex1) {
                        return new StringCell(l);
                    }
                }
            } else {
                return DataType.getMissingCell();
            }
        }
    });
    return crea;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataValue(org.knime.core.data.DataValue) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell(org.knime.core.data.def.StringCell) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) ColumnReference(org.knime.base.node.rules.Rule.ColumnReference)

Example 92 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class LogRegLearnerNodeDialogPane method createTargetOptionsPanel.

/**
 * Create options panel for the target.
 */
private JPanel createTargetOptionsPanel() {
    JPanel p = new JPanel(new GridBagLayout());
    GridBagConstraints c = new GridBagConstraints();
    c.fill = GridBagConstraints.HORIZONTAL;
    c.weightx = 0;
    c.weighty = 0;
    c.gridx = 0;
    c.gridy = 0;
    c.anchor = GridBagConstraints.BASELINE_LEADING;
    c.insets = new Insets(5, 5, 0, 0);
    p.add(new JLabel("Target Column:"), c);
    c.gridx++;
    m_selectionPanel = new ColumnSelectionPanel(new EmptyBorder(0, 0, 0, 0), NominalValue.class);
    m_selectionPanel.addActionListener(new ActionListener() {

        @Override
        public void actionPerformed(final ActionEvent e) {
            updateTargetCategories((DataCell) m_targetReferenceCategory.getSelectedItem());
        }
    });
    p.add(m_selectionPanel, c);
    c.gridx = 0;
    c.gridy++;
    p.add(new JLabel("Reference Category:"), c);
    c.gridx++;
    m_targetReferenceCategory = new JComboBox();
    p.add(m_targetReferenceCategory, c);
    c.gridx = 0;
    c.gridy++;
    c.gridwidth = 3;
    c.weightx = 1;
    m_notSortTarget = new JCheckBox("Use order from target column domain (only relevant for output representation)");
    p.add(m_notSortTarget, c);
    m_selectionPanel.addItemListener(new ItemListener() {

        @Override
        public void itemStateChanged(final ItemEvent e) {
            Object selected = e.getItem();
            if (selected instanceof DataColumnSpec) {
                m_filterPanel.resetHiding();
                m_filterPanel.hideColumns((DataColumnSpec) selected);
            }
        }
    });
    return p;
}
Also used : JPanel(javax.swing.JPanel) GridBagConstraints(java.awt.GridBagConstraints) ItemEvent(java.awt.event.ItemEvent) Insets(java.awt.Insets) GridBagLayout(java.awt.GridBagLayout) JComboBox(javax.swing.JComboBox) NominalValue(org.knime.core.data.NominalValue) ActionEvent(java.awt.event.ActionEvent) JLabel(javax.swing.JLabel) JCheckBox(javax.swing.JCheckBox) DataColumnSpec(org.knime.core.data.DataColumnSpec) ActionListener(java.awt.event.ActionListener) DataCell(org.knime.core.data.DataCell) ItemListener(java.awt.event.ItemListener) ColumnSelectionPanel(org.knime.core.node.util.ColumnSelectionPanel) EmptyBorder(javax.swing.border.EmptyBorder)

Example 93 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class LogRegLearner method checkConstantLearningFields.

private void checkConstantLearningFields(final BufferedDataTable data, final PMMLPortObjectSpec inPMMLSpec) throws InvalidSettingsException {
    Set<String> exclude = new HashSet<String>();
    for (DataColumnSpec colSpec : m_pmmlOutSpec.getLearningCols()) {
        if (colSpec.getType().isCompatible(DoubleValue.class)) {
            final DataColumnDomain domain = colSpec.getDomain();
            final DataCell lowerBound = domain.getLowerBound();
            final DataCell upperBound = domain.getUpperBound();
            assert lowerBound != null || data.size() == 0 : "Non empty table must have domain set at this point";
            if (ObjectUtils.equals(lowerBound, upperBound)) {
                exclude.add(colSpec.getName());
            }
        }
    }
    if (!exclude.isEmpty()) {
        StringBuilder warning = new StringBuilder();
        warning.append(exclude.size() == 1 ? "Column " : "Columns ");
        warning.append(ConvenienceMethods.getShortStringFrom(exclude, 5));
        warning.append(exclude.size() == 1 ? " has a constant value " : " have constant values ");
        warning.append(" - will be ignored during training");
        LOGGER.warn(warning.toString());
        m_warningMessage = (m_warningMessage == null ? "" : m_warningMessage + "\n") + warning.toString();
        // re-init learner so that it has the correct learning columns
        init(data.getDataTableSpec(), inPMMLSpec, exclude);
    }
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataCell(org.knime.core.data.DataCell) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 94 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class LogRegLearner method init.

/**
 * Initialize instance and check if settings are consistent.
 */
private void init(final DataTableSpec inSpec, final PMMLPortObjectSpec pmmlSpec, final Set<String> exclude) throws InvalidSettingsException {
    List<String> inputCols = new ArrayList<String>();
    for (DataColumnSpec column : inSpec) {
        inputCols.add(column.getName());
    }
    if (!m_settings.getIncludeAll()) {
        List<String> included = Arrays.asList(m_settings.getIncludedColumns());
        if (!inputCols.containsAll(included)) {
            LOGGER.warn("Input does not contain all learning columns. " + "Proceed with the remaining learning columns.");
        }
        inputCols.retainAll(included);
    }
    inputCols.remove(m_settings.getTargetColumn());
    if (inputCols.isEmpty()) {
        throw new InvalidSettingsException("At least one column must " + "be included.");
    }
    DataColumnSpec targetColSpec = null;
    List<DataColumnSpec> regressorColSpecs = new ArrayList<DataColumnSpec>();
    // Auto configuration when target is not set
    if (null == m_settings.getTargetColumn() && m_settings.getIncludeAll()) {
        for (int i = 0; i < inSpec.getNumColumns(); i++) {
            DataColumnSpec colSpec = inSpec.getColumnSpec(i);
            String colName = colSpec.getName();
            inputCols.remove(colName);
            if (colSpec.getType().isCompatible(NominalValue.class)) {
                m_settings.setTargetColumn(colName);
            }
        }
        // when there is no column with nominal data
        if (null == m_settings.getTargetColumn()) {
            throw new InvalidSettingsException("No column in " + "spec compatible to \"NominalValue\".");
        }
    }
    // remove all columns that should not be used
    inputCols.removeAll(exclude);
    for (int i = 0; i < inSpec.getNumColumns(); i++) {
        DataColumnSpec colSpec = inSpec.getColumnSpec(i);
        String colName = colSpec.getName();
        if (m_settings.getTargetColumn().equals(colName)) {
            if (colSpec.getType().isCompatible(NominalValue.class)) {
                targetColSpec = colSpec;
            } else {
                throw new InvalidSettingsException("Type of column \"" + colName + "\" is not nominal.");
            }
        } else if (inputCols.contains(colName)) {
            if (colSpec.getType().isCompatible(DoubleValue.class) || colSpec.getType().isCompatible(NominalValue.class)) {
                regressorColSpecs.add(colSpec);
            } else {
                throw new InvalidSettingsException("Type of column \"" + colName + "\" is not one of the allowed types, " + "which are numeric or nomial.");
            }
        }
    }
    if (null != targetColSpec) {
        // Check if target has at least two categories.
        final Set<DataCell> targetValues = targetColSpec.getDomain().getValues();
        if (targetValues != null && targetValues.size() < 2) {
            throw new InvalidSettingsException("The target column \"" + targetColSpec.getName() + "\" has one value, only. " + "At least two target categories are expected.");
        }
        String[] learnerCols = new String[regressorColSpecs.size() + 1];
        for (int i = 0; i < regressorColSpecs.size(); i++) {
            learnerCols[i] = regressorColSpecs.get(i).getName();
        }
        learnerCols[learnerCols.length - 1] = targetColSpec.getName();
        PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(pmmlSpec, inSpec);
        creator.setTargetCols(Arrays.asList(targetColSpec));
        creator.setLearningCols(regressorColSpecs);
        m_pmmlOutSpec = creator.createSpec();
        m_learner = new Learner(m_pmmlOutSpec, m_settings.getTargetReferenceCategory(), m_settings.getSortTargetCategories(), m_settings.getSortIncludesCategories());
    } else {
        throw new InvalidSettingsException("The target is " + "not in the input.");
    }
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) ArrayList(java.util.ArrayList) DataCell(org.knime.core.data.DataCell) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)

Example 95 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class Learner method perform.

/**
 * @param data The data table.
 * @param exec The execution context used for reporting progress.
 * @return An object which holds the results.
 * @throws CanceledExecutionException when method is cancelled
 * @throws InvalidSettingsException When settings are inconsistent with the data
 */
public LogisticRegressionContent perform(final BufferedDataTable data, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
    exec.checkCanceled();
    int iter = 0;
    boolean converged = false;
    final RegressionTrainingData trainingData = new RegressionTrainingData(data, m_outSpec, m_specialColumns, true, m_targetReferenceCategory, m_sortTargetCategories, m_sortFactorsCategories);
    int targetIndex = data.getDataTableSpec().findColumnIndex(m_outSpec.getTargetCols().get(0).getName());
    final int tcC = trainingData.getDomainValues().get(targetIndex).size();
    final int rC = trainingData.getRegressorCount();
    final RealMatrix beta = new Array2DRowRealMatrix(1, (tcC - 1) * (rC + 1));
    Double loglike = 0.0;
    Double loglikeOld = 0.0;
    exec.setMessage("Iterative optimization. Processing iteration 1.");
    // main loop
    while (iter < m_maxIter && !converged) {
        RealMatrix betaOld = beta.copy();
        loglikeOld = loglike;
        // Do heavy work in a separate thread which allows to interrupt it
        // note the queue may block if no more threads are available (e.g. thread count = 1)
        // as soon as we stall in 'get' this thread reduces the number of running thread
        Future<Double> future = ThreadPool.currentPool().enqueue(new Callable<Double>() {

            @Override
            public Double call() throws Exception {
                final ExecutionMonitor progMon = exec.createSubProgress(1.0 / m_maxIter);
                irlsRls(trainingData, beta, rC, tcC, progMon);
                progMon.setProgress(1.0);
                return likelihood(trainingData.iterator(), beta, rC, tcC, exec);
            }
        });
        try {
            loglike = future.get();
        } catch (InterruptedException e) {
            future.cancel(true);
            exec.checkCanceled();
            throw new RuntimeException(e);
        } catch (ExecutionException e) {
            if (e.getCause() instanceof RuntimeException) {
                throw (RuntimeException) e.getCause();
            } else {
                throw new RuntimeException(e.getCause());
            }
        }
        if (Double.isInfinite(loglike) || Double.isNaN(loglike)) {
            throw new RuntimeException(FAILING_MSG);
        }
        exec.checkCanceled();
        // test for decreasing likelihood
        while ((Double.isInfinite(loglike) || Double.isNaN(loglike) || loglike < loglikeOld) && iter > 0) {
            converged = true;
            for (int k = 0; k < beta.getRowDimension(); k++) {
                if (abs(beta.getEntry(k, 0) - betaOld.getEntry(k, 0)) > m_eps * abs(betaOld.getEntry(k, 0))) {
                    converged = false;
                    break;
                }
            }
            if (converged) {
                break;
            }
            // half the step size of beta
            beta.setSubMatrix((beta.add(betaOld)).scalarMultiply(0.5).getData(), 0, 0);
            exec.checkCanceled();
            loglike = likelihood(trainingData.iterator(), beta, rC, tcC, exec);
            exec.checkCanceled();
        }
        // test for convergence
        converged = true;
        for (int k = 0; k < beta.getRowDimension(); k++) {
            if (abs(beta.getEntry(k, 0) - betaOld.getEntry(k, 0)) > m_eps * abs(betaOld.getEntry(k, 0))) {
                converged = false;
                break;
            }
        }
        iter++;
        LOGGER.debug("#Iterations: " + iter);
        LOGGER.debug("Log Likelihood: " + loglike);
        StringBuilder betaBuilder = new StringBuilder();
        for (int i = 0; i < beta.getRowDimension() - 1; i++) {
            betaBuilder.append(Double.toString(beta.getEntry(i, 0)));
            betaBuilder.append(", ");
        }
        if (beta.getRowDimension() > 0) {
            betaBuilder.append(Double.toString(beta.getEntry(beta.getRowDimension() - 1, 0)));
        }
        LOGGER.debug("beta: " + betaBuilder.toString());
        exec.checkCanceled();
        exec.setMessage("Iterative optimization. #Iterations: " + iter + " | Log-likelihood: " + DoubleFormat.formatDouble(loglike) + ". Processing iteration " + (iter + 1) + ".");
    }
    // The covariance matrix
    RealMatrix covMat = new QRDecomposition(A).getSolver().getInverse().scalarMultiply(-1);
    List<String> factorList = new ArrayList<String>();
    List<String> covariateList = new ArrayList<String>();
    Map<String, List<DataCell>> factorDomainValues = new HashMap<String, List<DataCell>>();
    for (int i : trainingData.getActiveCols()) {
        DataColumnSpec columnSpec = data.getDataTableSpec().getColumnSpec(i);
        if (trainingData.getIsNominal().get(i)) {
            String factor = columnSpec.getName();
            factorList.add(factor);
            List<DataCell> values = trainingData.getDomainValues().get(i);
            factorDomainValues.put(factor, values);
        } else {
            if (columnSpec.getType().isCompatible(BitVectorValue.class) || columnSpec.getType().isCompatible(ByteVectorValue.class)) {
                int length = trainingData.getVectorLengths().getOrDefault(i, 0).intValue();
                for (int j = 0; j < length; ++j) {
                    covariateList.add(columnSpec.getName() + "[" + j + "]");
                }
            } else {
                covariateList.add(columnSpec.getName());
            }
        }
    }
    final Map<? extends Integer, Integer> vectorIndexLengths = trainingData.getVectorLengths();
    final Map<String, Integer> vectorLengths = new LinkedHashMap<String, Integer>();
    for (DataColumnSpec spec : m_specialColumns) {
        int colIndex = data.getSpec().findColumnIndex(spec.getName());
        if (colIndex >= 0) {
            vectorLengths.put(spec.getName(), vectorIndexLengths.get(colIndex));
        }
    }
    // create content
    LogisticRegressionContent content = new LogisticRegressionContent(m_outSpec, factorList, covariateList, vectorLengths, m_targetReferenceCategory, m_sortTargetCategories, m_sortFactorsCategories, beta, loglike, covMat, iter);
    return content;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RegressionTrainingData(org.knime.base.node.mine.regression.RegressionTrainingData) ArrayList(java.util.ArrayList) List(java.util.List) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) ExecutionException(java.util.concurrent.ExecutionException) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) ExecutionException(java.util.concurrent.ExecutionException) QRDecomposition(org.apache.commons.math3.linear.QRDecomposition) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DataCell(org.knime.core.data.DataCell) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue)

Aggregations

DataCell (org.knime.core.data.DataCell)780 DataRow (org.knime.core.data.DataRow)268 DataTableSpec (org.knime.core.data.DataTableSpec)175 DataColumnSpec (org.knime.core.data.DataColumnSpec)170 DefaultRow (org.knime.core.data.def.DefaultRow)169 ArrayList (java.util.ArrayList)141 StringCell (org.knime.core.data.def.StringCell)131 DoubleCell (org.knime.core.data.def.DoubleCell)129 DoubleValue (org.knime.core.data.DoubleValue)111 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)109 DataType (org.knime.core.data.DataType)97 RowKey (org.knime.core.data.RowKey)94 BufferedDataTable (org.knime.core.node.BufferedDataTable)93 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)91 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)84 LinkedHashMap (java.util.LinkedHashMap)81 IntCell (org.knime.core.data.def.IntCell)79 HashMap (java.util.HashMap)60 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)57 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)56