Search in sources :

Example 76 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class Statistics3Table method createStatisticsInColumnsTable.

/**
 * Creates the statistics in transposed compared to the original.
 *
 * @param exec An {@link ExecutionContext}.
 * @return Statistics {@link BufferedDataTable} with skewness and kurtosis in a transposed form.
 * @since 2.9
 */
public BufferedDataTable createStatisticsInColumnsTable(final ExecutionContext exec) {
    BufferedDataContainer container = exec.createDataContainer(getStatisticsSpecification());
    int colIdx = 0;
    for (DataColumnSpec spec : m_spec) {
        if (spec.getType().isCompatible(DoubleValue.class)) {
            container.addRowToTable(new DefaultRow(spec.getName(), createRow(spec.getName(), colIdx)));
        }
        colIdx++;
    }
    container.close();
    return container.getTable();
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 77 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class VariableToTable2NodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    DataTableSpec spec = createOutSpec();
    BufferedDataContainer cont = exec.createDataContainer(spec);
    List<Pair<String, FlowVariable.Type>> vars = getVariablesOfInterest();
    DataCell[] specs = new DataCell[vars.size()];
    List<String> lostVariables = new ArrayList<String>();
    for (int i = 0; i < vars.size(); i++) {
        Pair<String, FlowVariable.Type> c = vars.get(i);
        String name = c.getFirst();
        // fallback
        DataCell cell = DataType.getMissingCell();
        switch(c.getSecond()) {
            case DOUBLE:
                try {
                    double dValue = peekFlowVariableDouble(c.getFirst());
                    cell = new DoubleCell(dValue);
                } catch (NoSuchElementException e) {
                    lostVariables.add(name + " (Double)");
                }
                break;
            case INTEGER:
                try {
                    int iValue = peekFlowVariableInt(c.getFirst());
                    cell = new IntCell(iValue);
                } catch (NoSuchElementException e) {
                    lostVariables.add(name + " (Integer)");
                }
                break;
            case STRING:
                try {
                    String sValue = peekFlowVariableString(c.getFirst());
                    sValue = sValue == null ? "" : sValue;
                    cell = new StringCell(sValue);
                } catch (NoSuchElementException e) {
                    lostVariables.add(name + " (String)");
                }
                break;
        }
        specs[i] = cell;
    }
    cont.addRowToTable(new DefaultRow(m_rowID.getStringValue(), specs));
    cont.close();
    return new BufferedDataTable[] { cont.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) IntCell(org.knime.core.data.def.IntCell) PortType(org.knime.core.node.port.PortType) DataType(org.knime.core.data.DataType) StringCell(org.knime.core.data.def.StringCell) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) NoSuchElementException(java.util.NoSuchElementException) Pair(org.knime.core.util.Pair) FlowVariable(org.knime.core.node.workflow.FlowVariable)

Example 78 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class JavaRowSplitterNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final int rowCount = inData[0].getRowCount();
    m_rowCount = rowCount;
    DataTableRowInput input = new DataTableRowInput(inData[0]);
    DataTableSpec spec = inData[0].getDataTableSpec();
    BufferedDataContainer trueMatch = exec.createDataContainer(spec);
    BufferedDataTableRowOutput[] outputs;
    BufferedDataContainer falseMatch = null;
    if (getNrOutPorts() == 2) {
        falseMatch = exec.createDataContainer(spec);
    }
    outputs = Stream.of(trueMatch, falseMatch).filter(f -> f != null).map(f -> new BufferedDataTableRowOutput(f)).toArray(BufferedDataTableRowOutput[]::new);
    execute(input, outputs, exec);
    BufferedDataTable[] outTables = Stream.of(trueMatch, falseMatch).filter(f -> f != null).map(f -> f.getTable()).toArray(BufferedDataTable[]::new);
    return outTables;
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) NodeSettingsRO(org.knime.core.node.NodeSettingsRO) DataTableSpec(org.knime.core.data.DataTableSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) JavaScriptingCustomizer(org.knime.ext.sun.nodes.script.settings.JavaScriptingCustomizer) BufferedDataTableRowOutput(org.knime.core.node.streamable.BufferedDataTableRowOutput) Expression(org.knime.ext.sun.nodes.script.expression.Expression) ExecutionContext(org.knime.core.node.ExecutionContext) DataTableRowInput(org.knime.core.node.streamable.DataTableRowInput) StreamableOperatorInternals(org.knime.core.node.streamable.StreamableOperatorInternals) BooleanValue(org.knime.core.data.BooleanValue) DataCell(org.knime.core.data.DataCell) PortInput(org.knime.core.node.streamable.PortInput) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) PartitionInfo(org.knime.core.node.streamable.PartitionInfo) RowInput(org.knime.core.node.streamable.RowInput) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) ColumnCalculator(org.knime.ext.sun.nodes.script.calculator.ColumnCalculator) IOException(java.io.IOException) OutputPortRole(org.knime.core.node.streamable.OutputPortRole) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) NodeModel(org.knime.core.node.NodeModel) File(java.io.File) DataRow(org.knime.core.data.DataRow) PortOutput(org.knime.core.node.streamable.PortOutput) NodeSettingsWO(org.knime.core.node.NodeSettingsWO) BufferedDataTable(org.knime.core.node.BufferedDataTable) Stream(java.util.stream.Stream) KnowsRowCountTable(org.knime.core.node.BufferedDataTable.KnowsRowCountTable) FlowVariableProvider(org.knime.ext.sun.nodes.script.calculator.FlowVariableProvider) InputPortRole(org.knime.core.node.streamable.InputPortRole) SimpleStreamableOperatorInternals(org.knime.core.node.streamable.simple.SimpleStreamableOperatorInternals) CheckUtils(org.knime.core.node.util.CheckUtils) MergeOperator(org.knime.core.node.streamable.MergeOperator) RowOutput(org.knime.core.node.streamable.RowOutput) StreamableOperator(org.knime.core.node.streamable.StreamableOperator) JavaScriptingSettings(org.knime.ext.sun.nodes.script.settings.JavaScriptingSettings) DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataTableRowInput(org.knime.core.node.streamable.DataTableRowInput) BufferedDataTable(org.knime.core.node.BufferedDataTable) BufferedDataTableRowOutput(org.knime.core.node.streamable.BufferedDataTableRowOutput)

Example 79 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class HiliteScorerNodeModel method execute.

/**
 * Starts the scoring in the scorer.
 *
 * @param data
 *            the input data of length one
 * @param exec
 *            the execution monitor
 * @return the confusion matrix
 * @throws CanceledExecutionException
 *             if user canceled execution
 *
 * @see NodeModel#execute(BufferedDataTable[],ExecutionContext)
 */
@SuppressWarnings("unchecked")
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] data, final ExecutionContext exec) throws CanceledExecutionException {
    // check input data
    assert (data != null && data.length == 1 && data[INPORT] != null);
    // blow away result from last execute (should have been reset anyway)
    // first try to figure out what are the different class values
    // in the two respective columns
    BufferedDataTable in = data[INPORT];
    DataTableSpec inSpec = in.getDataTableSpec();
    final int index1 = inSpec.findColumnIndex(m_firstCompareColumn);
    final int index2 = inSpec.findColumnIndex(m_secondCompareColumn);
    // two elements, first is column names, second row names;
    // these arrays are ordered already, i.e. if both columns have
    // cells in common (e.g. both have Iris-Setosa), they get the same
    // index in the array. thus, the high numbers should appear
    // in the diagonal
    DataCell[] values = determineColValues(in, index1, index2, exec.createSubProgress(0.5));
    List<DataCell> valuesList = Arrays.asList(values);
    Set<DataCell> valuesInCol2 = new HashSet<DataCell>();
    m_correctCount = 0;
    m_falseCount = 0;
    // the key store remembers the row key for later hiliting
    List[][] keyStore = new List[values.length][values.length];
    // the scorerCount counts the confusions
    m_scorerCount = new int[values.length][values.length];
    // init the matrix
    for (int i = 0; i < keyStore.length; i++) {
        for (int j = 0; j < keyStore[i].length; j++) {
            keyStore[i][j] = new ArrayList<RowKey>();
        }
    }
    int rowCnt = in.getRowCount();
    int rowNr = 0;
    ExecutionMonitor subExec = exec.createSubProgress(0.5);
    for (Iterator<DataRow> it = in.iterator(); it.hasNext(); rowNr++) {
        DataRow row = it.next();
        subExec.setProgress((1.0 + rowNr) / rowCnt, "Computing score, row " + rowNr + " (\"" + row.getKey() + "\") of " + in.getRowCount());
        try {
            subExec.checkCanceled();
        } catch (CanceledExecutionException cee) {
            reset();
            throw cee;
        }
        DataCell cell1 = row.getCell(index1);
        DataCell cell2 = row.getCell(index2);
        valuesInCol2.add(cell2);
        if (cell1.isMissing() || cell2.isMissing()) {
            continue;
        }
        boolean areEqual = cell1.equals(cell2);
        int i1 = valuesList.indexOf(cell1);
        int i2 = areEqual ? i1 : valuesList.indexOf(cell2);
        assert i1 >= 0 : "column spec lacks possible value " + cell1;
        assert i2 >= 0 : "column spec lacks possible value " + cell2;
        // i2 must be equal to i1 if cells are equal (implication)
        assert (!areEqual || i1 == valuesList.indexOf(cell2));
        keyStore[i1][i2].add(row.getKey());
        m_scorerCount[i1][i2]++;
        if (areEqual) {
            m_correctCount++;
        } else {
            m_falseCount++;
        }
    }
    m_nrRows = rowNr;
    HashSet<String> valuesAsStringSet = new HashSet<String>();
    HashSet<String> duplicateValuesAsString = new HashSet<String>();
    for (DataCell c : values) {
        valuesAsStringSet.add(c.toString());
    }
    for (DataCell c : values) {
        String cAsString = c.toString();
        if (!valuesAsStringSet.remove(cAsString)) {
            duplicateValuesAsString.add(cAsString);
        }
    }
    boolean hasPrintedWarningOnAmbiguousValues = false;
    m_values = new String[values.length];
    for (int i = 0; i < m_values.length; i++) {
        DataCell c = values[i];
        String s = c.toString();
        if (duplicateValuesAsString.contains(s)) {
            boolean isInSecondColumn = valuesInCol2.contains(c);
            int uniquifier = 1;
            if (isInSecondColumn) {
                s = s.concat(" (" + m_secondCompareColumn + ")");
            } else {
                s = s.concat(" (" + m_firstCompareColumn + ")");
            }
            String newName = s;
            while (!valuesAsStringSet.add(newName)) {
                newName = s + "#" + (uniquifier++);
            }
            m_values[i] = newName;
            if (!hasPrintedWarningOnAmbiguousValues) {
                hasPrintedWarningOnAmbiguousValues = true;
                setWarningMessage("Ambiguous value \"" + c.toString() + "\" encountered. Preserving individual instances;" + " consider to convert input columns to string");
            }
        } else {
            int uniquifier = 1;
            String newName = s;
            while (!valuesAsStringSet.add(newName)) {
                newName = s + "#" + (uniquifier++);
            }
            m_values[i] = newName;
        }
    }
    DataType[] colTypes = new DataType[m_values.length];
    Arrays.fill(colTypes, IntCell.TYPE);
    BufferedDataContainer container = exec.createDataContainer(new DataTableSpec(m_values, colTypes));
    for (int i = 0; i < m_values.length; i++) {
        // need to make a datacell for the row key
        container.addRowToTable(new DefaultRow(m_values[i], m_scorerCount[i]));
    }
    container.close();
    // print info
    int correct = getCorrectCount();
    int incorrect = getFalseCount();
    double error = getError();
    int nrRows = getNrRows();
    int missing = nrRows - correct - incorrect;
    m_keyStore = keyStore;
    LOGGER.info("error=" + error + ", #correct=" + correct + ", #false=" + incorrect + ", #rows=" + nrRows + ", #missing=" + missing);
    // our view displays the table - we must keep a reference in the model.
    BufferedDataTable result = container.getTable();
    return new BufferedDataTable[] { result };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) DataRow(org.knime.core.data.DataRow) Point(java.awt.Point) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType) ArrayList(java.util.ArrayList) List(java.util.List) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 80 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class MissingValueHandlingTable method createMissingValueHandlingTable.

// getColSetting(DataTableSpec, ColSetting[])
/**
 * Does missing value handling to the argument table given the col settings
 * in an array and also reports progress.
 *
 * @param table the table to do missing value handling on
 * @param colSettings the settings
 * @param exec for progress/cancel and to create the buffered data table
 * @param warningBuffer To which potential warning messages are added.
 * @return a cache table, cleaned up
 * @throws CanceledExecutionException if canceled
 */
public static BufferedDataTable createMissingValueHandlingTable(final DataTable table, final ColSetting[] colSettings, final ExecutionContext exec, final StringBuffer warningBuffer) throws CanceledExecutionException {
    ColSetting[] colSetting;
    try {
        colSetting = getColSetting(table.getDataTableSpec(), colSettings, false);
    } catch (InvalidSettingsException ise) {
        LOGGER.coding("getColSetting method is not supposed to throw " + "an exception, ignoring settings", ise);
        DataTableSpec s = table.getDataTableSpec();
        colSetting = new ColSetting[s.getNumColumns()];
        for (int i = 0; i < s.getNumColumns(); i++) {
            colSetting[i] = new ColSetting(s.getColumnSpec(i));
            colSetting[i].setMethod(ColSetting.METHOD_NO_HANDLING);
        }
    }
    boolean needStatistics = false;
    int mostFrequentColCount = 0;
    for (int i = 0; i < colSetting.length; i++) {
        ColSetting c = colSetting[i];
        switch(c.getMethod()) {
            case ColSetting.METHOD_MOST_FREQUENT:
                mostFrequentColCount++;
            case ColSetting.METHOD_MAX:
            case ColSetting.METHOD_MIN:
            case ColSetting.METHOD_MEAN:
                needStatistics = true;
                break;
            default:
        }
    }
    int[] mostFrequentCols = new int[mostFrequentColCount];
    if (mostFrequentColCount > 0) {
        int index = 0;
        for (int i = 0; i < colSetting.length; i++) {
            ColSetting c = colSetting[i];
            switch(c.getMethod()) {
                case ColSetting.METHOD_MOST_FREQUENT:
                    mostFrequentCols[index++] = i;
                    break;
                default:
            }
        }
    }
    DataTable t;
    ExecutionMonitor e;
    if (needStatistics && !(table instanceof StatisticsTable)) {
        // for creating statistics table
        ExecutionMonitor subExec = exec.createSubProgress(0.5);
        t = new MyStatisticsTable(table, subExec, mostFrequentCols);
        if (((MyStatisticsTable) t).m_warningMessage != null) {
            warningBuffer.append(((MyStatisticsTable) t).m_warningMessage);
        }
        // for the iterator
        e = exec.createSubProgress(0.5);
    } else {
        t = table;
        e = exec;
    }
    MissingValueHandlingTable mvht = new MissingValueHandlingTable(t, colSetting);
    BufferedDataContainer container = exec.createDataContainer(mvht.getDataTableSpec());
    e.setMessage("Adding rows...");
    int count = 0;
    try {
        MissingValueHandlingTableIterator it = new MissingValueHandlingTableIterator(mvht, e);
        while (it.hasNext()) {
            DataRow next;
            next = it.next();
            e.setMessage("Adding row " + (count + 1) + " (\"" + next.getKey() + "\")");
            container.addRowToTable(next);
            count++;
        }
    } catch (MissingValueHandlingTableIterator.RuntimeCanceledExecutionException rcee) {
        throw rcee.getCause();
    } finally {
        container.close();
    }
    return container.getTable();
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) StatisticsTable(org.knime.base.data.statistics.StatisticsTable) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Aggregations

BufferedDataContainer (org.knime.core.node.BufferedDataContainer)157 BufferedDataTable (org.knime.core.node.BufferedDataTable)96 DefaultRow (org.knime.core.data.def.DefaultRow)93 DataCell (org.knime.core.data.DataCell)88 DataTableSpec (org.knime.core.data.DataTableSpec)88 DataRow (org.knime.core.data.DataRow)80 RowKey (org.knime.core.data.RowKey)38 DoubleCell (org.knime.core.data.def.DoubleCell)37 StringCell (org.knime.core.data.def.StringCell)26 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)24 ArrayList (java.util.ArrayList)23 DataColumnSpec (org.knime.core.data.DataColumnSpec)21 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)21 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)17 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)16 IOException (java.io.IOException)15 ExecutionContext (org.knime.core.node.ExecutionContext)15 LinkedHashMap (java.util.LinkedHashMap)14 HashSet (java.util.HashSet)13 IntCell (org.knime.core.data.def.IntCell)13