Search in sources :

Example 91 with DataCell

use of in project knime-core by knime.

the class RuleEngineNodeModel method createRearranger.

private ColumnRearranger createRearranger(final DataTableSpec inSpec, final List<Rule> rules) throws InvalidSettingsException {
    ColumnRearranger crea = new ColumnRearranger(inSpec);
    String newColName = DataTableSpec.getUniqueColumnName(inSpec, m_settings.getNewColName());
    final int defaultLabelColumnIndex;
    if (m_settings.getDefaultLabelIsColumn()) {
        if (m_settings.getDefaultLabel().length() < 3) {
            throw new InvalidSettingsException("Default label is not a column reference");
        if (!m_settings.getDefaultLabel().startsWith("$") || !m_settings.getDefaultLabel().endsWith("$")) {
            throw new InvalidSettingsException("Column references in default label must be enclosed in $");
        String colRef = m_settings.getDefaultLabel().substring(1, m_settings.getDefaultLabel().length() - 1);
        defaultLabelColumnIndex = inSpec.findColumnIndex(colRef);
        if (defaultLabelColumnIndex == -1) {
            throw new InvalidSettingsException("Column '" + m_settings.getDefaultLabel() + "' for default label does not exist in input table");
    } else {
        defaultLabelColumnIndex = -1;
    // determine output type
    List<DataType> types = new ArrayList<DataType>();
    // add outcome column types
    for (Rule r : rules) {
        if (r.getOutcome() instanceof ColumnReference) {
            types.add(((ColumnReference) r.getOutcome()).spec.getType());
        } else if (r.getOutcome() instanceof Double) {
        } else if (r.getOutcome() instanceof Integer) {
        } else if (r.getOutcome().toString().length() > 0) {
    if (defaultLabelColumnIndex >= 0) {
    } else if (m_settings.getDefaultLabel().length() > 0) {
        try {
        } catch (NumberFormatException ex) {
            try {
            } catch (NumberFormatException ex1) {
    final DataType outType;
    if (types.size() > 0) {
        DataType temp = types.get(0);
        for (int i = 1; i < types.size(); i++) {
            temp = DataType.getCommonSuperType(temp, types.get(i));
        if ((temp.getValueClasses().size() == 1) && temp.getValueClasses().contains(DataValue.class)) {
            // a non-native type, we replace it with string
            temp = StringCell.TYPE;
        outType = temp;
    } else {
        outType = StringCell.TYPE;
    DataColumnSpec cs = new DataColumnSpecCreator(newColName, outType).createSpec();
    crea.append(new SingleCellFactory(cs) {

        public DataCell getCell(final DataRow row) {
            for (Rule r : rules) {
                if (r.matches(row)) {
                    Object outcome = r.getOutcome();
                    if (outcome instanceof ColumnReference) {
                        DataCell cell = row.getCell(((ColumnReference) outcome).index);
                        if (outType.equals(StringCell.TYPE) && !cell.isMissing() && !cell.getType().equals(StringCell.TYPE)) {
                            return new StringCell(cell.toString());
                        } else {
                            return cell;
                    } else if (outType.equals(IntCell.TYPE)) {
                        return new IntCell((Integer) outcome);
                    } else if (outType.equals(DoubleCell.TYPE)) {
                        return new DoubleCell((Double) outcome);
                    } else {
                        return new StringCell(outcome.toString());
            if (defaultLabelColumnIndex >= 0) {
                DataCell cell = row.getCell(defaultLabelColumnIndex);
                if (outType.equals(StringCell.TYPE) && !cell.getType().equals(StringCell.TYPE)) {
                    return new StringCell(cell.toString());
                } else {
                    return cell;
            } else if (m_settings.getDefaultLabel().length() > 0) {
                String l = m_settings.getDefaultLabel();
                if (outType.equals(StringCell.TYPE)) {
                    return new StringCell(l);
                try {
                    int i = Integer.parseInt(l);
                    return new IntCell(i);
                } catch (NumberFormatException ex) {
                    try {
                        double d = Double.parseDouble(l);
                        return new DoubleCell(d);
                    } catch (NumberFormatException ex1) {
                        return new StringCell(l);
            } else {
                return DataType.getMissingCell();
    return crea;
Also used : DataColumnSpecCreator( DataValue( DoubleCell( ArrayList(java.util.ArrayList) DataRow( IntCell( ColumnRearranger( DataColumnSpec( InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell( DataType( DataCell( SingleCellFactory( ColumnReference(org.knime.base.node.rules.Rule.ColumnReference)

Example 92 with DataCell

use of in project knime-core by knime.

the class LogRegLearnerNodeDialogPane method createTargetOptionsPanel.

 * Create options panel for the target.
private JPanel createTargetOptionsPanel() {
    JPanel p = new JPanel(new GridBagLayout());
    GridBagConstraints c = new GridBagConstraints();
    c.fill = GridBagConstraints.HORIZONTAL;
    c.weightx = 0;
    c.weighty = 0;
    c.gridx = 0;
    c.gridy = 0;
    c.anchor = GridBagConstraints.BASELINE_LEADING;
    c.insets = new Insets(5, 5, 0, 0);
    p.add(new JLabel("Target Column:"), c);
    m_selectionPanel = new ColumnSelectionPanel(new EmptyBorder(0, 0, 0, 0), NominalValue.class);
    m_selectionPanel.addActionListener(new ActionListener() {

        public void actionPerformed(final ActionEvent e) {
            updateTargetCategories((DataCell) m_targetReferenceCategory.getSelectedItem());
    p.add(m_selectionPanel, c);
    c.gridx = 0;
    p.add(new JLabel("Reference Category:"), c);
    m_targetReferenceCategory = new JComboBox();
    p.add(m_targetReferenceCategory, c);
    c.gridx = 0;
    c.gridwidth = 3;
    c.weightx = 1;
    m_notSortTarget = new JCheckBox("Use order from target column domain (only relevant for output representation)");
    p.add(m_notSortTarget, c);
    m_selectionPanel.addItemListener(new ItemListener() {

        public void itemStateChanged(final ItemEvent e) {
            Object selected = e.getItem();
            if (selected instanceof DataColumnSpec) {
                m_filterPanel.hideColumns((DataColumnSpec) selected);
    return p;
Also used : JPanel(javax.swing.JPanel) GridBagConstraints(java.awt.GridBagConstraints) ItemEvent(java.awt.event.ItemEvent) Insets(java.awt.Insets) GridBagLayout(java.awt.GridBagLayout) JComboBox(javax.swing.JComboBox) NominalValue( ActionEvent(java.awt.event.ActionEvent) JLabel(javax.swing.JLabel) JCheckBox(javax.swing.JCheckBox) DataColumnSpec( ActionListener(java.awt.event.ActionListener) DataCell( ItemListener(java.awt.event.ItemListener) ColumnSelectionPanel(org.knime.core.node.util.ColumnSelectionPanel) EmptyBorder(javax.swing.border.EmptyBorder)

Example 93 with DataCell

use of in project knime-core by knime.

the class LogRegLearner method checkConstantLearningFields.

private void checkConstantLearningFields(final BufferedDataTable data, final PMMLPortObjectSpec inPMMLSpec) throws InvalidSettingsException {
    Set<String> exclude = new HashSet<String>();
    for (DataColumnSpec colSpec : m_pmmlOutSpec.getLearningCols()) {
        if (colSpec.getType().isCompatible(DoubleValue.class)) {
            final DataColumnDomain domain = colSpec.getDomain();
            final DataCell lowerBound = domain.getLowerBound();
            final DataCell upperBound = domain.getUpperBound();
            assert lowerBound != null || data.size() == 0 : "Non empty table must have domain set at this point";
            if (ObjectUtils.equals(lowerBound, upperBound)) {
    if (!exclude.isEmpty()) {
        StringBuilder warning = new StringBuilder();
        warning.append(exclude.size() == 1 ? "Column " : "Columns ");
        warning.append(ConvenienceMethods.getShortStringFrom(exclude, 5));
        warning.append(exclude.size() == 1 ? " has a constant value " : " have constant values ");
        warning.append(" - will be ignored during training");
        m_warningMessage = (m_warningMessage == null ? "" : m_warningMessage + "\n") + warning.toString();
        // re-init learner so that it has the correct learning columns
        init(data.getDataTableSpec(), inPMMLSpec, exclude);
Also used : DataColumnSpec( DataColumnDomain( DataCell( HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 94 with DataCell

use of in project knime-core by knime.

the class LogRegLearner method init.

 * Initialize instance and check if settings are consistent.
private void init(final DataTableSpec inSpec, final PMMLPortObjectSpec pmmlSpec, final Set<String> exclude) throws InvalidSettingsException {
    List<String> inputCols = new ArrayList<String>();
    for (DataColumnSpec column : inSpec) {
    if (!m_settings.getIncludeAll()) {
        List<String> included = Arrays.asList(m_settings.getIncludedColumns());
        if (!inputCols.containsAll(included)) {
            LOGGER.warn("Input does not contain all learning columns. " + "Proceed with the remaining learning columns.");
    if (inputCols.isEmpty()) {
        throw new InvalidSettingsException("At least one column must " + "be included.");
    DataColumnSpec targetColSpec = null;
    List<DataColumnSpec> regressorColSpecs = new ArrayList<DataColumnSpec>();
    // Auto configuration when target is not set
    if (null == m_settings.getTargetColumn() && m_settings.getIncludeAll()) {
        for (int i = 0; i < inSpec.getNumColumns(); i++) {
            DataColumnSpec colSpec = inSpec.getColumnSpec(i);
            String colName = colSpec.getName();
            if (colSpec.getType().isCompatible(NominalValue.class)) {
        // when there is no column with nominal data
        if (null == m_settings.getTargetColumn()) {
            throw new InvalidSettingsException("No column in " + "spec compatible to \"NominalValue\".");
    // remove all columns that should not be used
    for (int i = 0; i < inSpec.getNumColumns(); i++) {
        DataColumnSpec colSpec = inSpec.getColumnSpec(i);
        String colName = colSpec.getName();
        if (m_settings.getTargetColumn().equals(colName)) {
            if (colSpec.getType().isCompatible(NominalValue.class)) {
                targetColSpec = colSpec;
            } else {
                throw new InvalidSettingsException("Type of column \"" + colName + "\" is not nominal.");
        } else if (inputCols.contains(colName)) {
            if (colSpec.getType().isCompatible(DoubleValue.class) || colSpec.getType().isCompatible(NominalValue.class)) {
            } else {
                throw new InvalidSettingsException("Type of column \"" + colName + "\" is not one of the allowed types, " + "which are numeric or nomial.");
    if (null != targetColSpec) {
        // Check if target has at least two categories.
        final Set<DataCell> targetValues = targetColSpec.getDomain().getValues();
        if (targetValues != null && targetValues.size() < 2) {
            throw new InvalidSettingsException("The target column \"" + targetColSpec.getName() + "\" has one value, only. " + "At least two target categories are expected.");
        String[] learnerCols = new String[regressorColSpecs.size() + 1];
        for (int i = 0; i < regressorColSpecs.size(); i++) {
            learnerCols[i] = regressorColSpecs.get(i).getName();
        learnerCols[learnerCols.length - 1] = targetColSpec.getName();
        PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(pmmlSpec, inSpec);
        m_pmmlOutSpec = creator.createSpec();
        m_learner = new Learner(m_pmmlOutSpec, m_settings.getTargetReferenceCategory(), m_settings.getSortTargetCategories(), m_settings.getSortIncludesCategories());
    } else {
        throw new InvalidSettingsException("The target is " + "not in the input.");
Also used : DataColumnSpec( InvalidSettingsException(org.knime.core.node.InvalidSettingsException) ArrayList(java.util.ArrayList) DataCell( PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)

Example 95 with DataCell

use of in project knime-core by knime.

the class Learner method perform.

 * @param data The data table.
 * @param exec The execution context used for reporting progress.
 * @return An object which holds the results.
 * @throws CanceledExecutionException when method is cancelled
 * @throws InvalidSettingsException When settings are inconsistent with the data
public LogisticRegressionContent perform(final BufferedDataTable data, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
    int iter = 0;
    boolean converged = false;
    final RegressionTrainingData trainingData = new RegressionTrainingData(data, m_outSpec, m_specialColumns, true, m_targetReferenceCategory, m_sortTargetCategories, m_sortFactorsCategories);
    int targetIndex = data.getDataTableSpec().findColumnIndex(m_outSpec.getTargetCols().get(0).getName());
    final int tcC = trainingData.getDomainValues().get(targetIndex).size();
    final int rC = trainingData.getRegressorCount();
    final RealMatrix beta = new Array2DRowRealMatrix(1, (tcC - 1) * (rC + 1));
    Double loglike = 0.0;
    Double loglikeOld = 0.0;
    exec.setMessage("Iterative optimization. Processing iteration 1.");
    // main loop
    while (iter < m_maxIter && !converged) {
        RealMatrix betaOld = beta.copy();
        loglikeOld = loglike;
        // Do heavy work in a separate thread which allows to interrupt it
        // note the queue may block if no more threads are available (e.g. thread count = 1)
        // as soon as we stall in 'get' this thread reduces the number of running thread
        Future<Double> future = ThreadPool.currentPool().enqueue(new Callable<Double>() {

            public Double call() throws Exception {
                final ExecutionMonitor progMon = exec.createSubProgress(1.0 / m_maxIter);
                irlsRls(trainingData, beta, rC, tcC, progMon);
                return likelihood(trainingData.iterator(), beta, rC, tcC, exec);
        try {
            loglike = future.get();
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        } catch (ExecutionException e) {
            if (e.getCause() instanceof RuntimeException) {
                throw (RuntimeException) e.getCause();
            } else {
                throw new RuntimeException(e.getCause());
        if (Double.isInfinite(loglike) || Double.isNaN(loglike)) {
            throw new RuntimeException(FAILING_MSG);
        // test for decreasing likelihood
        while ((Double.isInfinite(loglike) || Double.isNaN(loglike) || loglike < loglikeOld) && iter > 0) {
            converged = true;
            for (int k = 0; k < beta.getRowDimension(); k++) {
                if (abs(beta.getEntry(k, 0) - betaOld.getEntry(k, 0)) > m_eps * abs(betaOld.getEntry(k, 0))) {
                    converged = false;
            if (converged) {
            // half the step size of beta
            beta.setSubMatrix((beta.add(betaOld)).scalarMultiply(0.5).getData(), 0, 0);
            loglike = likelihood(trainingData.iterator(), beta, rC, tcC, exec);
        // test for convergence
        converged = true;
        for (int k = 0; k < beta.getRowDimension(); k++) {
            if (abs(beta.getEntry(k, 0) - betaOld.getEntry(k, 0)) > m_eps * abs(betaOld.getEntry(k, 0))) {
                converged = false;
        LOGGER.debug("#Iterations: " + iter);
        LOGGER.debug("Log Likelihood: " + loglike);
        StringBuilder betaBuilder = new StringBuilder();
        for (int i = 0; i < beta.getRowDimension() - 1; i++) {
            betaBuilder.append(Double.toString(beta.getEntry(i, 0)));
            betaBuilder.append(", ");
        if (beta.getRowDimension() > 0) {
            betaBuilder.append(Double.toString(beta.getEntry(beta.getRowDimension() - 1, 0)));
        LOGGER.debug("beta: " + betaBuilder.toString());
        exec.setMessage("Iterative optimization. #Iterations: " + iter + " | Log-likelihood: " + DoubleFormat.formatDouble(loglike) + ". Processing iteration " + (iter + 1) + ".");
    // The covariance matrix
    RealMatrix covMat = new QRDecomposition(A).getSolver().getInverse().scalarMultiply(-1);
    List<String> factorList = new ArrayList<String>();
    List<String> covariateList = new ArrayList<String>();
    Map<String, List<DataCell>> factorDomainValues = new HashMap<String, List<DataCell>>();
    for (int i : trainingData.getActiveCols()) {
        DataColumnSpec columnSpec = data.getDataTableSpec().getColumnSpec(i);
        if (trainingData.getIsNominal().get(i)) {
            String factor = columnSpec.getName();
            List<DataCell> values = trainingData.getDomainValues().get(i);
            factorDomainValues.put(factor, values);
        } else {
            if (columnSpec.getType().isCompatible(BitVectorValue.class) || columnSpec.getType().isCompatible(ByteVectorValue.class)) {
                int length = trainingData.getVectorLengths().getOrDefault(i, 0).intValue();
                for (int j = 0; j < length; ++j) {
                    covariateList.add(columnSpec.getName() + "[" + j + "]");
            } else {
    final Map<? extends Integer, Integer> vectorIndexLengths = trainingData.getVectorLengths();
    final Map<String, Integer> vectorLengths = new LinkedHashMap<String, Integer>();
    for (DataColumnSpec spec : m_specialColumns) {
        int colIndex = data.getSpec().findColumnIndex(spec.getName());
        if (colIndex >= 0) {
            vectorLengths.put(spec.getName(), vectorIndexLengths.get(colIndex));
    // create content
    LogisticRegressionContent content = new LogisticRegressionContent(m_outSpec, factorList, covariateList, vectorLengths, m_targetReferenceCategory, m_sortTargetCategories, m_sortFactorsCategories, beta, loglike, covMat, iter);
    return content;
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ByteVectorValue( LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec( Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RegressionTrainingData(org.knime.base.node.mine.regression.RegressionTrainingData) ArrayList(java.util.ArrayList) List(java.util.List) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) ExecutionException(java.util.concurrent.ExecutionException) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) ExecutionException(java.util.concurrent.ExecutionException) QRDecomposition(org.apache.commons.math3.linear.QRDecomposition) Array2DRowRealMatrix(org.apache.commons.math3.linear.Array2DRowRealMatrix) RealMatrix(org.apache.commons.math3.linear.RealMatrix) DataCell( BitVectorValue(


DataCell ( DataRow ( DataTableSpec ( DataColumnSpec ( DefaultRow ( ArrayList (java.util.ArrayList)141 StringCell ( DoubleCell ( DoubleValue ( InvalidSettingsException (org.knime.core.node.InvalidSettingsException)109 DataType ( RowKey ( BufferedDataTable (org.knime.core.node.BufferedDataTable)93 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)91 DataColumnSpecCreator ( LinkedHashMap (java.util.LinkedHashMap)81 IntCell ( HashMap (java.util.HashMap)60 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)57 ColumnRearranger (