Search in sources :

Example 81 with DataTableSpec

use of org.knime.core.data.DataTableSpec in project knime-core by knime.

the class ColumnListLoopStartNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected DataTableSpec[] configure(final DataTableSpec[] inSpecs) throws InvalidSettingsException {
    if ((m_settings.iterateOverColumns().size() < 1) && !m_settings.iterateAllColumns()) {
        throw new InvalidSettingsException("No columns to iterate over selected");
    }
    if (!m_settings.iterateAllColumns()) {
        for (String col : m_settings.iterateOverColumns()) {
            if (!inSpecs[0].containsName(col)) {
                throw new IllegalArgumentException("Column '" + col + "' does not exist in input table");
            }
        }
    }
    assert m_iteration == 0;
    pushFlowVariableInt("currentIteration", m_iteration);
    ColumnRearranger crea = createRearranger(inSpecs[0]);
    return new DataTableSpec[] { crea.createSpec() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException)

Example 82 with DataTableSpec

use of org.knime.core.data.DataTableSpec in project knime-core by knime.

the class NaiveBayesPredictorNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    // check the input data
    assert (inSpecs != null && inSpecs.length == 2 && inSpecs[DATA_IN_PORT] != null && inSpecs[MODEL_IN_PORT] != null);
    final PortObjectSpec modelObject = inSpecs[MODEL_IN_PORT];
    if (!(modelObject instanceof NaiveBayesPortObjectSpec)) {
        throw new IllegalArgumentException("Invalid input data");
    }
    final DataTableSpec trainingSpec = ((NaiveBayesPortObjectSpec) modelObject).getTableSpec();
    final DataColumnSpec classColumn = ((NaiveBayesPortObjectSpec) modelObject).getClassColumn();
    if (trainingSpec == null) {
        throw new InvalidSettingsException("No model spec available");
    }
    final PortObjectSpec inSpec = inSpecs[DATA_IN_PORT];
    if (!(inSpec instanceof DataTableSpec)) {
        throw new IllegalArgumentException("TableSpec must not be null");
    }
    final DataTableSpec spec = (DataTableSpec) inSpec;
    // check the input data for columns with the wrong name or wrong type
    final List<String> unknownCols = check4UnknownCols(trainingSpec, spec);
    if (unknownCols.size() >= spec.getNumColumns()) {
        setWarningMessage("No known attribute columns found use " + "class prior probability to predict the class membership");
    } else if (unknownCols.size() == 1) {
        setWarningMessage("Input column " + unknownCols.get(0) + " is unknown and will be skipped.");
    } else if (unknownCols.size() > 1) {
        final StringBuilder buf = new StringBuilder();
        buf.append("The following input columns are unknown and " + "will be skipped: ");
        for (int i = 0, length = unknownCols.size(); i < length; i++) {
            if (i != 0) {
                buf.append(", ");
            }
            if (i > 3) {
                buf.append("...");
                break;
            }
            buf.append(unknownCols.get(i));
        }
        setWarningMessage(buf.toString());
    }
    // check if the learned model contains columns which are not in the
    // input data
    final List<String> missingInputCols = check4MissingCols(trainingSpec, classColumn.getName(), spec);
    if (missingInputCols.size() == 1) {
        setWarningMessage("Attribute " + missingInputCols.get(0) + " is missing in the input data");
    } else if (missingInputCols.size() > 1) {
        final StringBuilder buf = new StringBuilder();
        buf.append("The following attributes are missing in " + "the input data: ");
        for (int i = 0, length = missingInputCols.size(); i < length; i++) {
            if (i != 0) {
                buf.append(", ");
            }
            if (i > 3) {
                buf.append("...");
                break;
            }
            buf.append(missingInputCols.get(i));
        }
        setWarningMessage(buf.toString());
    }
    final DataColumnSpec resultColSpecs = NaiveBayesCellFactory.createResultColSpecs(classColumn, spec, m_inclProbVals.getBooleanValue());
    if (resultColSpecs != null) {
        return new PortObjectSpec[] { AppendedColumnTable.getTableSpec(spec, resultColSpecs) };
    }
    return null;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) NaiveBayesPortObjectSpec(org.knime.base.node.mine.bayes.naivebayes.port.NaiveBayesPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) NaiveBayesPortObjectSpec(org.knime.base.node.mine.bayes.naivebayes.port.NaiveBayesPortObjectSpec)

Example 83 with DataTableSpec

use of org.knime.core.data.DataTableSpec in project knime-core by knime.

the class NaiveBayesLearnerNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    // check the internal variables if they are valid
    final String classColumn = m_classifyColumnName.getStringValue();
    if (classColumn == null || classColumn.length() < 1) {
        throw new InvalidSettingsException("Please define the classification column");
    }
    final PortObjectSpec inSpec = inSpecs[TRAINING_DATA_PORT];
    if (!(inSpec instanceof DataTableSpec)) {
        throw new IllegalArgumentException("Invalid input data");
    }
    final DataTableSpec tableSpec = (DataTableSpec) inSpec;
    if (tableSpec.findColumnIndex(classColumn) < 0) {
        throw new InvalidSettingsException("Please define the classification column");
    }
    if (tableSpec.getNumColumns() < 2) {
        throw new InvalidSettingsException("Input table should contain at least 2 columns");
    }
    final int maxNoOfNominalVals = m_maxNoOfNominalVals.getIntValue();
    // check if the table contains at least one nominal column
    // and check each nominal column with a valid domain
    // if it contains more values than allowed
    boolean containsNominalCol = false;
    final List<String> toBigNominalColumns = new ArrayList<>();
    for (int i = 0, length = tableSpec.getNumColumns(); i < length; i++) {
        final DataColumnSpec colSpec = tableSpec.getColumnSpec(i);
        if (colSpec.getType().isCompatible(NominalValue.class)) {
            containsNominalCol = true;
            final DataColumnDomain domain = colSpec.getDomain();
            if (domain != null && domain.getValues() != null) {
                if (domain.getValues().size() > maxNoOfNominalVals) {
                    // unique values
                    if (colSpec.getName().equals(classColumn)) {
                        // contains too many unique values
                        throw new InvalidSettingsException("Class column domain contains too many unique values" + " (" + domain.getValues().size() + ")");
                    }
                    toBigNominalColumns.add(colSpec.getName() + " (" + domain.getValues().size() + ")");
                }
            }
        }
    }
    if (!containsNominalCol) {
        throw new InvalidSettingsException("No possible class attribute found in input table");
    }
    if (toBigNominalColumns.size() == 1) {
        setWarningMessage("Column " + toBigNominalColumns.get(0) + " will possibly be skipped.");
    } else if (toBigNominalColumns.size() > 1) {
        final StringBuilder buf = new StringBuilder();
        buf.append("The following columns will possibly be skipped: ");
        for (int i = 0, length = toBigNominalColumns.size(); i < length; i++) {
            if (i != 0) {
                buf.append(", ");
            }
            if (i > 3) {
                buf.append("...");
                break;
            }
            buf.append(toBigNominalColumns.get(i));
        }
        setWarningMessage(buf.toString());
    }
    if (tableSpec.getNumColumns() - toBigNominalColumns.size() < 1) {
        throw new InvalidSettingsException("Not enough valid columns");
    }
    return new PortObjectSpec[] { new NaiveBayesPortObjectSpec(tableSpec, tableSpec.getColumnSpec(classColumn)) };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ArrayList(java.util.ArrayList) NaiveBayesPortObjectSpec(org.knime.base.node.mine.bayes.naivebayes.port.NaiveBayesPortObjectSpec) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) NaiveBayesPortObjectSpec(org.knime.base.node.mine.bayes.naivebayes.port.NaiveBayesPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec)

Example 84 with DataTableSpec

use of org.knime.core.data.DataTableSpec in project knime-core by knime.

the class DecTreePredictorNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
public PortObject[] execute(final PortObject[] inPorts, final ExecutionContext exec) throws CanceledExecutionException, Exception {
    exec.setMessage("Decision Tree Predictor: Loading predictor...");
    PMMLPortObject port = (PMMLPortObject) inPorts[INMODELPORT];
    List<Node> models = port.getPMMLValue().getModels(PMMLModelType.TreeModel);
    if (models.isEmpty()) {
        String msg = "Decision Tree evaluation failed: " + "No tree model found.";
        LOGGER.error(msg);
        throw new RuntimeException(msg);
    }
    PMMLDecisionTreeTranslator trans = new PMMLDecisionTreeTranslator();
    port.initializeModelTranslator(trans);
    DecisionTree decTree = trans.getDecisionTree();
    decTree.resetColorInformation();
    BufferedDataTable inData = (BufferedDataTable) inPorts[INDATAPORT];
    // get column with color information
    String colorColumn = null;
    for (DataColumnSpec s : inData.getDataTableSpec()) {
        if (s.getColorHandler() != null) {
            colorColumn = s.getName();
            break;
        }
    }
    decTree.setColorColumn(colorColumn);
    exec.setMessage("Decision Tree Predictor: start execution.");
    PortObjectSpec[] inSpecs = new PortObjectSpec[] { inPorts[0].getSpec(), inPorts[1].getSpec() };
    DataTableSpec outSpec = createOutTableSpec(inSpecs);
    BufferedDataContainer outData = exec.createDataContainer(outSpec);
    long coveredPattern = 0;
    long nrPattern = 0;
    long rowCount = 0;
    long numberRows = inData.size();
    exec.setMessage("Classifying...");
    for (DataRow thisRow : inData) {
        DataCell cl = null;
        LinkedHashMap<String, Double> classDistrib = null;
        try {
            Pair<DataCell, LinkedHashMap<DataCell, Double>> pair = decTree.getWinnerAndClasscounts(thisRow, inData.getDataTableSpec());
            cl = pair.getFirst();
            LinkedHashMap<DataCell, Double> classCounts = pair.getSecond();
            classDistrib = getDistribution(classCounts);
            if (coveredPattern < m_maxNumCoveredPattern.getIntValue()) {
                // remember this one for HiLite support
                decTree.addCoveredPattern(thisRow, inData.getDataTableSpec());
                coveredPattern++;
            } else {
                // too many patterns for HiLite - at least remember color
                decTree.addCoveredColor(thisRow, inData.getDataTableSpec());
            }
            nrPattern++;
        } catch (Exception e) {
            LOGGER.error("Decision Tree evaluation failed: " + e.getMessage());
            throw e;
        }
        if (cl == null) {
            LOGGER.error("Decision Tree evaluation failed: result empty");
            throw new Exception("Decision Tree evaluation failed.");
        }
        DataCell[] newCells = new DataCell[outSpec.getNumColumns()];
        int numInCells = thisRow.getNumCells();
        for (int i = 0; i < numInCells; i++) {
            newCells[i] = thisRow.getCell(i);
        }
        if (m_showDistribution.getBooleanValue()) {
            for (int i = numInCells; i < newCells.length - 1; i++) {
                String predClass = outSpec.getColumnSpec(i).getName();
                if (classDistrib != null && classDistrib.get(predClass) != null) {
                    newCells[i] = new DoubleCell(classDistrib.get(predClass));
                } else {
                    newCells[i] = new DoubleCell(0.0);
                }
            }
        }
        newCells[newCells.length - 1] = cl;
        outData.addRowToTable(new DefaultRow(thisRow.getKey(), newCells));
        rowCount++;
        if (rowCount % 100 == 0) {
            exec.setProgress(rowCount / (double) numberRows, "Classifying... Row " + rowCount + " of " + numberRows);
        }
        exec.checkCanceled();
    }
    if (coveredPattern < nrPattern) {
        // let the user know that we did not store all available pattern
        // for HiLiting.
        this.setWarningMessage("Tree only stored first " + m_maxNumCoveredPattern.getIntValue() + " (of " + nrPattern + ") rows for HiLiting!");
    }
    outData.close();
    m_decTree = decTree;
    exec.setMessage("Decision Tree Predictor: end execution.");
    return new BufferedDataTable[] { outData.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLDecisionTreeTranslator(org.knime.base.node.mine.decisiontree2.PMMLDecisionTreeTranslator) DoubleCell(org.knime.core.data.def.DoubleCell) Node(org.w3c.dom.Node) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) DecisionTree(org.knime.base.node.mine.decisiontree2.model.DecisionTree) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 85 with DataTableSpec

use of org.knime.core.data.DataTableSpec in project knime-core by knime.

the class DecTreePredictorNodeModel method createOutTableSpec.

private DataTableSpec createOutTableSpec(final PortObjectSpec[] inSpecs) {
    LinkedList<DataCell> predValues = null;
    if (m_showDistribution.getBooleanValue()) {
        predValues = getPredictionValues((PMMLPortObjectSpec) inSpecs[INMODELPORT]);
        if (predValues == null) {
            // no out spec can be determined
            return null;
        }
    }
    int numCols = (predValues == null ? 0 : predValues.size()) + 1;
    DataTableSpec inSpec = (DataTableSpec) inSpecs[INDATAPORT];
    UniqueNameGenerator nameGenerator = new UniqueNameGenerator(inSpec);
    DataColumnSpec[] newCols = new DataColumnSpec[numCols];
    /* Set bar renderer and domain [0,1] as default for the double cells
         * containing the distribution */
    // DataColumnProperties propsRendering = new DataColumnProperties(
    // Collections.singletonMap(
    // DataValueRenderer.PROPERTY_PREFERRED_RENDERER,
    // DoubleBarRenderer.DESCRIPTION));
    DataColumnDomain domain = new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0)).createDomain();
    // add all distribution columns
    for (int i = 0; i < numCols - 1; i++) {
        DataColumnSpecCreator colSpecCreator = nameGenerator.newCreator(predValues.get(i).toString(), DoubleCell.TYPE);
        // colSpecCreator.setProperties(propsRendering);
        colSpecCreator.setDomain(domain);
        newCols[i] = colSpecCreator.createSpec();
    }
    // add the prediction column
    newCols[numCols - 1] = nameGenerator.newColumn("Prediction (DecTree)", StringCell.TYPE);
    DataTableSpec newColSpec = new DataTableSpec(newCols);
    return new DataTableSpec(inSpec, newColSpec);
}
Also used : PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) UniqueNameGenerator(org.knime.core.util.UniqueNameGenerator)

Aggregations

DataTableSpec (org.knime.core.data.DataTableSpec)938 DataColumnSpec (org.knime.core.data.DataColumnSpec)340 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)306 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)228 BufferedDataTable (org.knime.core.node.BufferedDataTable)226 DataCell (org.knime.core.data.DataCell)186 DataRow (org.knime.core.data.DataRow)170 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)136 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)129 DataType (org.knime.core.data.DataType)109 ArrayList (java.util.ArrayList)106 PortObjectSpec (org.knime.core.node.port.PortObjectSpec)98 DoubleValue (org.knime.core.data.DoubleValue)94 DefaultRow (org.knime.core.data.def.DefaultRow)92 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)90 ExecutionContext (org.knime.core.node.ExecutionContext)68 PortObject (org.knime.core.node.port.PortObject)66 PMMLPortObjectSpec (org.knime.core.node.port.pmml.PMMLPortObjectSpec)62 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)61 RowKey (org.knime.core.data.RowKey)59