Search in sources :

Example 46 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class AppendVariableToTableNodeModel method createColumnRearranger.

private ColumnRearranger createColumnRearranger(final DataTableSpec spec) throws InvalidSettingsException {
    ColumnRearranger arranger = new ColumnRearranger(spec);
    Set<String> nameHash = new HashSet<String>();
    for (DataColumnSpec c : spec) {
        nameHash.add(c.getName());
    }
    List<Pair<String, FlowVariable.Type>> vars;
    if (m_settings.getIncludeAll()) {
        vars = getAllVariables();
    } else {
        vars = m_settings.getVariablesOfInterest();
    }
    if (vars.isEmpty()) {
        throw new InvalidSettingsException("No variables selected");
    }
    DataColumnSpec[] specs = new DataColumnSpec[vars.size()];
    final DataCell[] values = new DataCell[vars.size()];
    for (int i = 0; i < vars.size(); i++) {
        Pair<String, FlowVariable.Type> c = vars.get(i);
        String name = c.getFirst();
        DataType type;
        switch(c.getSecond()) {
            case DOUBLE:
                type = DoubleCell.TYPE;
                try {
                    double dValue = peekFlowVariableDouble(name);
                    values[i] = new DoubleCell(dValue);
                } catch (NoSuchElementException e) {
                    throw new InvalidSettingsException("No such flow variable (of type double): " + name);
                }
                break;
            case INTEGER:
                type = IntCell.TYPE;
                try {
                    int iValue = peekFlowVariableInt(name);
                    values[i] = new IntCell(iValue);
                } catch (NoSuchElementException e) {
                    throw new InvalidSettingsException("No such flow variable (of type int): " + name);
                }
                break;
            case STRING:
                type = StringCell.TYPE;
                try {
                    String sValue = peekFlowVariableString(name);
                    sValue = sValue == null ? "" : sValue;
                    values[i] = new StringCell(sValue);
                } catch (NoSuchElementException e) {
                    throw new InvalidSettingsException("No such flow variable (of type String): " + name);
                }
                break;
            default:
                throw new InvalidSettingsException("Unsupported variable type: " + c.getSecond());
        }
        if (nameHash.contains(name) && !name.toLowerCase().endsWith("(variable)")) {
            name = name.concat(" (variable)");
        }
        String newName = name;
        int uniquifier = 1;
        while (!nameHash.add(newName)) {
            newName = name + " (#" + (uniquifier++) + ")";
        }
        specs[i] = new DataColumnSpecCreator(newName, type).createSpec();
    }
    arranger.append(new AbstractCellFactory(specs) {

        /**
         * {@inheritDoc}
         */
        @Override
        public DataCell[] getCells(final DataRow row) {
            return values;
        }
    });
    return arranger;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataType(org.knime.core.data.DataType) HashSet(java.util.HashSet) Pair(org.knime.core.util.Pair) AbstractCellFactory(org.knime.core.data.container.AbstractCellFactory) PortType(org.knime.core.node.port.PortType) DataType(org.knime.core.data.DataType) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) NoSuchElementException(java.util.NoSuchElementException) FlowVariable(org.knime.core.node.workflow.FlowVariable)

Example 47 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class VariableFileReaderNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    Map<String, FlowVariable> stack = createStack(m_frSettings.getVariableName());
    VariableFileReaderNodeSettings settings = m_frSettings.createSettingsFrom(stack);
    LOGGER.info("Preparing to read from '" + m_frSettings.getDataFileLocation().toString() + "'.");
    // check again the settings - especially file existence (under Linux
    // files could be deleted/renamed since last config-call...
    SettingsStatus status = settings.getStatusOfSettings(true, null);
    if (status.getNumOfErrors() > 0) {
        throw new InvalidSettingsException(status.getAllErrorMessages(10));
    }
    DataTableSpec tSpec = settings.createDataTableSpec();
    FileTable fTable = new FileTable(tSpec, settings, settings.getSkippedColumns(), exec);
    // create a DataContainer and fill it with the rows read. It is faster
    // then reading the file every time (for each row iterator), and it
    // collects the domain for each column for us. Also, if things fail,
    // the error message is printed during file reader execution (were it
    // belongs to) and not some time later when a node uses the row
    // iterator from the file table.
    BufferedDataContainer c = exec.createDataContainer(fTable.getDataTableSpec(), /* initDomain= */
    true);
    int row = 0;
    FileRowIterator it = fTable.iterator();
    try {
        if (it.getZipEntryName() != null) {
            // seems we are reading a ZIP archive.
            LOGGER.info("Reading entry '" + it.getZipEntryName() + "' from the specified ZIP archive.");
        }
        while (it.hasNext()) {
            row++;
            DataRow next = it.next();
            String message = "Caching row #" + row + " (\"" + next.getKey() + "\")";
            exec.setMessage(message);
            exec.checkCanceled();
            c.addRowToTable(next);
        }
        if (it.zippedSourceHasMoreEntries()) {
            // after reading til the end of the file this returns a valid
            // result
            setWarningMessage("Source is a ZIP archive with multiple " + "entries. Only reading first entry!");
        }
    } catch (DuplicateKeyException dke) {
        String msg = dke.getMessage();
        if (msg == null) {
            msg = "Duplicate row IDs";
        }
        msg += ". Consider making IDs unique in the advanced settings.";
        DuplicateKeyException newDKE = new DuplicateKeyException(msg);
        newDKE.initCause(dke);
        throw newDKE;
    } finally {
        c.close();
    }
    // user settings allow for truncating the table
    if (it.iteratorEndedEarly()) {
        setWarningMessage("Data was truncated due to user settings.");
    }
    BufferedDataTable out = c.getTable();
    // closes all sources.
    fTable.dispose();
    return new BufferedDataTable[] { out };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) SettingsStatus(org.knime.core.util.tokenizer.SettingsStatus) DataRow(org.knime.core.data.DataRow) DuplicateKeyException(org.knime.core.util.DuplicateKeyException) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedDataTable(org.knime.core.node.BufferedDataTable) FlowVariable(org.knime.core.node.workflow.FlowVariable)

Example 48 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class DecTreePredictorNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
public PortObject[] execute(final PortObject[] inPorts, final ExecutionContext exec) throws CanceledExecutionException, Exception {
    exec.setMessage("Decision Tree Predictor: Loading predictor...");
    PMMLPortObject port = (PMMLPortObject) inPorts[INMODELPORT];
    List<Node> models = port.getPMMLValue().getModels(PMMLModelType.TreeModel);
    if (models.isEmpty()) {
        String msg = "Decision Tree evaluation failed: " + "No tree model found.";
        LOGGER.error(msg);
        throw new RuntimeException(msg);
    }
    PMMLDecisionTreeTranslator trans = new PMMLDecisionTreeTranslator();
    port.initializeModelTranslator(trans);
    DecisionTree decTree = trans.getDecisionTree();
    decTree.resetColorInformation();
    BufferedDataTable inData = (BufferedDataTable) inPorts[INDATAPORT];
    // get column with color information
    String colorColumn = null;
    for (DataColumnSpec s : inData.getDataTableSpec()) {
        if (s.getColorHandler() != null) {
            colorColumn = s.getName();
            break;
        }
    }
    decTree.setColorColumn(colorColumn);
    exec.setMessage("Decision Tree Predictor: start execution.");
    PortObjectSpec[] inSpecs = new PortObjectSpec[] { inPorts[0].getSpec(), inPorts[1].getSpec() };
    DataTableSpec outSpec = createOutTableSpec(inSpecs);
    BufferedDataContainer outData = exec.createDataContainer(outSpec);
    long coveredPattern = 0;
    long nrPattern = 0;
    long rowCount = 0;
    long numberRows = inData.size();
    exec.setMessage("Classifying...");
    for (DataRow thisRow : inData) {
        DataCell cl = null;
        LinkedHashMap<String, Double> classDistrib = null;
        try {
            Pair<DataCell, LinkedHashMap<DataCell, Double>> pair = decTree.getWinnerAndClasscounts(thisRow, inData.getDataTableSpec());
            cl = pair.getFirst();
            LinkedHashMap<DataCell, Double> classCounts = pair.getSecond();
            classDistrib = getDistribution(classCounts);
            if (coveredPattern < m_maxNumCoveredPattern.getIntValue()) {
                // remember this one for HiLite support
                decTree.addCoveredPattern(thisRow, inData.getDataTableSpec());
                coveredPattern++;
            } else {
                // too many patterns for HiLite - at least remember color
                decTree.addCoveredColor(thisRow, inData.getDataTableSpec());
            }
            nrPattern++;
        } catch (Exception e) {
            LOGGER.error("Decision Tree evaluation failed: " + e.getMessage());
            throw e;
        }
        if (cl == null) {
            LOGGER.error("Decision Tree evaluation failed: result empty");
            throw new Exception("Decision Tree evaluation failed.");
        }
        DataCell[] newCells = new DataCell[outSpec.getNumColumns()];
        int numInCells = thisRow.getNumCells();
        for (int i = 0; i < numInCells; i++) {
            newCells[i] = thisRow.getCell(i);
        }
        if (m_showDistribution.getBooleanValue()) {
            for (int i = numInCells; i < newCells.length - 1; i++) {
                String predClass = outSpec.getColumnSpec(i).getName();
                if (classDistrib != null && classDistrib.get(predClass) != null) {
                    newCells[i] = new DoubleCell(classDistrib.get(predClass));
                } else {
                    newCells[i] = new DoubleCell(0.0);
                }
            }
        }
        newCells[newCells.length - 1] = cl;
        outData.addRowToTable(new DefaultRow(thisRow.getKey(), newCells));
        rowCount++;
        if (rowCount % 100 == 0) {
            exec.setProgress(rowCount / (double) numberRows, "Classifying... Row " + rowCount + " of " + numberRows);
        }
        exec.checkCanceled();
    }
    if (coveredPattern < nrPattern) {
        // let the user know that we did not store all available pattern
        // for HiLiting.
        this.setWarningMessage("Tree only stored first " + m_maxNumCoveredPattern.getIntValue() + " (of " + nrPattern + ") rows for HiLiting!");
    }
    outData.close();
    m_decTree = decTree;
    exec.setMessage("Decision Tree Predictor: end execution.");
    return new BufferedDataTable[] { outData.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLDecisionTreeTranslator(org.knime.base.node.mine.decisiontree2.PMMLDecisionTreeTranslator) DoubleCell(org.knime.core.data.def.DoubleCell) Node(org.w3c.dom.Node) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) DecisionTree(org.knime.base.node.mine.decisiontree2.model.DecisionTree) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 49 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class RegressionPredictorNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    PMMLPortObjectSpec regModelSpec = (PMMLPortObjectSpec) inSpecs[0];
    DataTableSpec dataSpec = (DataTableSpec) inSpecs[1];
    if (dataSpec == null || regModelSpec == null) {
        throw new InvalidSettingsException("No input specification available");
    }
    if (regModelSpec.getTargetCols().get(0).getType().isCompatible(DoubleValue.class) && m_settings.getIncludeProbabilities()) {
        setWarningMessage("The option \"Append columns with predicted probabilities\"" + " has only an effect for nominal targets");
    }
    if (null != RegressionPredictorCellFactory.createColumnSpec(regModelSpec, dataSpec, m_settings)) {
        ColumnRearranger c = new ColumnRearranger(dataSpec);
        c.append(new RegressionPredictorCellFactory(regModelSpec, dataSpec, m_settings) {

            @Override
            public DataCell[] getCells(final DataRow row) {
                // not called during configure
                return null;
            }
        });
        DataTableSpec outSpec = c.createSpec();
        return new DataTableSpec[] { outSpec };
    } else {
        return null;
    }
}
Also used : PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) DataTableSpec(org.knime.core.data.DataTableSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DoubleValue(org.knime.core.data.DoubleValue) DataRow(org.knime.core.data.DataRow)

Example 50 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class BitVectorGeneratorNodeModel method calculateMeanValues.

private double[] calculateMeanValues(final DataTable input) {
    double[] meanValues = new double[input.getDataTableSpec().getNumColumns()];
    int nrOfRows = 0;
    for (DataRow row : input) {
        for (int i = 0; i < row.getNumCells(); i++) {
            if (row.getCell(i).isMissing() || !row.getCell(i).getType().isCompatible(DoubleValue.class)) {
                continue;
            }
            meanValues[i] += ((DoubleValue) row.getCell(i)).getDoubleValue();
        }
        nrOfRows++;
    }
    for (int i = 0; i < meanValues.length; i++) {
        meanValues[i] = meanValues[i] / nrOfRows;
    }
    return meanValues;
}
Also used : DataRow(org.knime.core.data.DataRow)

Aggregations

DataRow (org.knime.core.data.DataRow)482 DataCell (org.knime.core.data.DataCell)268 DataTableSpec (org.knime.core.data.DataTableSpec)159 BufferedDataTable (org.knime.core.node.BufferedDataTable)125 DataColumnSpec (org.knime.core.data.DataColumnSpec)109 RowKey (org.knime.core.data.RowKey)88 DefaultRow (org.knime.core.data.def.DefaultRow)88 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)80 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)76 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)73 DoubleValue (org.knime.core.data.DoubleValue)72 ArrayList (java.util.ArrayList)65 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)65 RowIterator (org.knime.core.data.RowIterator)62 DataType (org.knime.core.data.DataType)61 DoubleCell (org.knime.core.data.def.DoubleCell)57 StringCell (org.knime.core.data.def.StringCell)53 SingleCellFactory (org.knime.core.data.container.SingleCellFactory)48 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)44 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)43