Search in sources :

Example 6 with CloseableRowIterator

use of org.knime.core.data.container.CloseableRowIterator in project knime-core by knime.

the class BootstrapNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    // Init random object
    long seed = m_configuration.getUseSeed() ? m_configuration.getSeed() : System.currentTimeMillis();
    Random random = new Random(seed);
    // Create containers for output tables
    BufferedDataContainer bootstrap = exec.createDataContainer(getSamplesSpec(inData[0].getDataTableSpec()));
    BufferedDataContainer holdout = exec.createDataContainer(inData[0].getDataTableSpec());
    // Create iterator for all rows
    CloseableRowIterator iterator = inData[0].iterator();
    int numberOfRows = inData[0].getRowCount();
    // Init unprocessed rows with amount of all rows
    int unprocessedRows = numberOfRows;
    // Create progress object with amount of all rows
    Progress progress = new Progress(numberOfRows, exec);
    // Calculate number of samples
    int numberOfSamples;
    if (m_configuration.getInPercent()) {
        numberOfSamples = Math.round(numberOfRows * (m_configuration.getPercent() / 100));
    } else {
        numberOfSamples = m_configuration.getSize();
    }
    // Execute while until every row has been processed
    while (unprocessedRows > 0) {
        int chunkSize;
        int numberOfChunkSamples;
        // and will take care of fixing rounding issues
        if (unprocessedRows > MAX_CHUNK_SIZE) {
            // Set to biggest allowed size
            chunkSize = MAX_CHUNK_SIZE;
            // Calculate amount of samples relative to the size of this chunk
            numberOfChunkSamples = Math.round((chunkSize / (float) numberOfRows) * numberOfSamples);
        } else {
            // Make this chunk as big as there are rows left
            chunkSize = unprocessedRows;
            // Generate the rest of the samples
            // (this will take care of rounding errors that may occur in the relative calculation)
            // we never put more than 2^31 rows in the bootstrap container, therefore it's safe to cast to int
            numberOfChunkSamples = numberOfSamples - (int) bootstrap.size();
        }
        // Sample this chunk
        sampleChunk(iterator, chunkSize, numberOfChunkSamples, bootstrap, holdout, random, progress);
        // Mark chunked rows as processed
        unprocessedRows -= chunkSize;
    }
    iterator.close();
    bootstrap.close();
    holdout.close();
    return new BufferedDataTable[] { bootstrap.getTable(), holdout.getTable() };
}
Also used : Random(java.util.Random) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) BufferedDataTable(org.knime.core.node.BufferedDataTable) CloseableRowIterator(org.knime.core.data.container.CloseableRowIterator)

Example 7 with CloseableRowIterator

use of org.knime.core.data.container.CloseableRowIterator in project knime-core by knime.

the class HiliteFilterNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    DataTableSpec inSpec = inData[0].getDataTableSpec();
    BufferedDataContainer bufIn = exec.createDataContainer(inSpec);
    BufferedDataContainer bufOut = exec.createDataContainer(inSpec);
    synchronized (m_inHdl) {
        double rowCnt = inData[0].size();
        CloseableRowIterator it = inData[0].iterator();
        for (long i = 0; i < rowCnt; i++) {
            DataRow row = it.next();
            if (m_inHdl.isHiLit(row.getKey())) {
                bufIn.addRowToTable(row);
            } else {
                bufOut.addRowToTable(row);
            }
            exec.checkCanceled();
            exec.setProgress((i + 1) / rowCnt);
        }
    }
    bufIn.close();
    bufOut.close();
    m_inHdl.addHiLiteListener(this);
    return new BufferedDataTable[] { bufIn.getTable(), bufOut.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) BufferedDataTable(org.knime.core.node.BufferedDataTable) CloseableRowIterator(org.knime.core.data.container.CloseableRowIterator) DataRow(org.knime.core.data.DataRow)

Example 8 with CloseableRowIterator

use of org.knime.core.data.container.CloseableRowIterator in project knime-core by knime.

the class FixedWidthFRTable method iterator.

/**
 * {@inheritDoc}
 */
@Override
public CloseableRowIterator iterator() {
    try {
        synchronized (m_iterators) {
            CloseableRowIterator i = createRowIterator(m_nodeSettings, m_tableSpec, m_exec);
            m_iterators.add(new WeakReference<CloseableRowIterator>(i));
            return i;
        }
    } catch (IOException ioe) {
        LOGGER.error("I/O Error occurred while trying to open a stream" + " to '" + m_nodeSettings.getFileLocation().toString() + "'.");
    }
    return null;
}
Also used : CloseableRowIterator(org.knime.core.data.container.CloseableRowIterator) IOException(java.io.IOException)

Example 9 with CloseableRowIterator

use of org.knime.core.data.container.CloseableRowIterator in project GenericKnimeNodes by genericworkflownodes.

the class BeanShellNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataContainer container1 = null;
    Interpreter ip = new Interpreter();
    ip.eval(script_init);
    CloseableRowIterator iter = null;
    if (!script_firstPass.equals("")) {
        iter = inData[0].iterator();
        while (iter.hasNext()) {
            ip.set("INROW", fillInRow(iter.next()));
            ip.eval(script_firstPass);
        }
        iter.close();
    }
    iter = inData[0].iterator();
    int idx = 1;
    boolean first = true;
    while (iter.hasNext()) {
        ip.set("OUTROW", new OutRow());
        ip.set("INROW", fillInRow(iter.next()));
        ip.eval(script_secondPass);
        OutRow out = (OutRow) ip.get("OUTROW");
        if (out.isNull()) {
            continue;
        }
        if (first) {
            container1 = exec.createDataContainer(getDataTableSpec2(out));
            first = false;
        }
        List<Object> values = out.getValues();
        int N = values.size();
        int i = 0;
        DataCell[] cells = new DataCell[N];
        for (Object value : values) {
            cells[i++] = getCell(value);
        }
        DefaultRow row = new DefaultRow("Row " + idx++, cells);
        container1.addRowToTable(row);
    }
    iter.close();
    container1.close();
    BufferedDataTable out1 = container1.getTable();
    return new BufferedDataTable[] { out1 };
}
Also used : Interpreter(bsh.Interpreter) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) BufferedDataTable(org.knime.core.node.BufferedDataTable) CloseableRowIterator(org.knime.core.data.container.CloseableRowIterator) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 10 with CloseableRowIterator

use of org.knime.core.data.container.CloseableRowIterator in project knime-core by knime.

the class Joiner method performJoin.

/**
 * This method start with reading the partitions of the left table defined
 * in currParts. If memory is low, partitions will be skipped or the
 * number of partitions will be raised which leads to smaller partitions.
 * Successfully read partitions will be joined. The return collection
 * defines the successfully processed partitions.
 *
 * @param leftTable The inner input table.
 * @param rightTable The right input table.
 * @param outputContainer The container used for storing matches.
 * @param pendingParts The parts that are not processed yet.
 * @param exec The execution context.
 * @param progressDiff The difference in the progress monitor.
 * @return The partitions that were successfully processed (read + joined).
 * @throws CanceledExecutionException when execution is canceled
 */
private Collection<Integer> performJoin(final BufferedDataTable leftTable, final BufferedDataTable rightTable, final JoinContainer outputContainer, final Collection<Integer> pendingParts, final ExecutionContext exec, final double progressDiff) throws CanceledExecutionException {
    // Update increment for reporting progress
    double progress = exec.getProgressMonitor().getProgress();
    double numRows = leftTable.size() + rightTable.size();
    double inc = (progressDiff - progress) / numRows;
    Collection<Integer> currParts = new ArrayList<Integer>();
    currParts.addAll(pendingParts);
    setMessage("Read", exec, pendingParts, currParts);
    // Partition left table
    Map<Integer, Map<JoinTuple, Set<Integer>>> leftTableHashed = new HashMap<Integer, Map<JoinTuple, Set<Integer>>>();
    // This is only used when m_leftRetain is true and m_matchAny is false.
    // It holds the row indices of the left table that do not match to
    // any row of the right table
    Map<Integer, Set<Integer>> leftOuterJoins = new HashMap<Integer, Set<Integer>>();
    MemoryActionIndicator memIndicator = MemoryAlertSystem.getInstance().newIndicator();
    int counter = 0;
    long rowsAdded = 0;
    CloseableRowIterator leftIter = leftTable.iterator();
    while (leftIter.hasNext()) {
        exec.checkCanceled();
        boolean saveToAddMoreRows = !memIndicator.lowMemoryActionRequired() && ((m_rowsAddedBeforeForcedOOM == 0) || (rowsAdded % m_rowsAddedBeforeForcedOOM != (m_rowsAddedBeforeForcedOOM - 1)));
        if (saveToAddMoreRows) {
            DataRow row = leftIter.next();
            InputRow inputDataRow = new InputRow(row, counter, InputRow.Settings.InDataPort.Left, m_inputDataRowSettings);
            for (JoinTuple tuple : inputDataRow.getJoinTuples()) {
                int partition = tuple.hashCode() & m_bitMask;
                if (currParts.contains(partition)) {
                    addRow(leftTableHashed, leftOuterJoins, partition, tuple, inputDataRow);
                    rowsAdded++;
                }
            }
            counter++;
            // report progress
            progress += inc;
            exec.getProgressMonitor().setProgress(progress);
        } else {
            rowsAdded++;
            // Build list of partitions that are not empty
            List<Integer> nonEmptyPartitions = new ArrayList<Integer>();
            for (Integer i : currParts) {
                if (null != leftTableHashed.get(i)) {
                    nonEmptyPartitions.add(i);
                }
            }
            int numNonEmpty = nonEmptyPartitions.size();
            if (numNonEmpty > 1) {
                // remove input partitions to free memory
                List<Integer> removeParts = new ArrayList<Integer>();
                for (int i = 0; i < numNonEmpty / 2; i++) {
                    removeParts.add(nonEmptyPartitions.get(i));
                }
                // remove collected data of the no longer processed
                for (int i : removeParts) {
                    leftTableHashed.remove(i);
                    if (m_retainLeft && !m_matchAny) {
                        leftOuterJoins.remove(i);
                    }
                }
                currParts.removeAll(removeParts);
                LOGGER.debug("Skip partitions while " + "reading inner table. Currently Processed: " + currParts + ". Skip: " + removeParts);
                // update increment for reporting progress
                numRows += leftTable.size() + rightTable.size();
                inc = (progressDiff - progress) / numRows;
                setMessage("Read", exec, pendingParts, currParts);
            } else if (nonEmptyPartitions.size() == 1) {
                if (m_numBits < m_numBitsMaximal) {
                    LOGGER.debug("Increase number of partitions while " + "reading inner table. Currently " + "Processed: " + nonEmptyPartitions);
                    // increase number of partitions
                    m_numBits = m_numBits + 1;
                    m_bitMask = m_bitMask | (0x0001 << (m_numBits - 1));
                    Set<Integer> pending = new TreeSet<Integer>();
                    pending.addAll(pendingParts);
                    pendingParts.clear();
                    for (int i : pending) {
                        pendingParts.add(i);
                        int ii = i | (0x0001 << (m_numBits - 1));
                        pendingParts.add(ii);
                    }
                    int currPart = nonEmptyPartitions.iterator().next();
                    currParts.clear();
                    currParts.add(currPart);
                    // update chunk size
                    retainPartitions(leftTableHashed, leftOuterJoins, currPart);
                    // update increment for reporting progress
                    numRows += leftTable.size() + rightTable.size();
                    inc = (progressDiff - progress) / numRows;
                    setMessage("Read", exec, pendingParts, currParts);
                } else {
                    // We have now 2^32 partitions.
                    // We can only keep going and hope that other nodes
                    // may free some memory.
                    LOGGER.warn("Memory is low. " + "I have no chance to free memory. This may " + "cause an endless loop.");
                }
            } else if (nonEmptyPartitions.size() < 1) {
                // We have only empty partitions.
                // Other node consume to much memory,
                // we cannot free more memory
                LOGGER.warn("Memory is low. " + "I have no chance to free memory. This may " + "cause an endless loop.");
            }
        }
    }
    setMessage("Join", exec, pendingParts, currParts);
    // Join with outer table
    joinInMemory(leftTableHashed, leftOuterJoins, currParts, rightTable, outputContainer, exec, inc);
    // Log which parts were successfully joined
    for (int part : currParts) {
        int numTuples = leftTableHashed.get(part) != null ? leftTableHashed.get(part).values().size() : 0;
        LOGGER.debug("Joined " + part + " with " + numTuples + " tuples.");
    }
    // Garbage collector has problems without this explicit clearance.
    leftTableHashed.clear();
    leftOuterJoins.clear();
    // return successfully joined parts
    return currParts;
}
Also used : TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CloseableRowIterator(org.knime.core.data.container.CloseableRowIterator) DataRow(org.knime.core.data.DataRow) MemoryActionIndicator(org.knime.core.data.util.memory.MemoryAlertSystem.MemoryActionIndicator) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

CloseableRowIterator (org.knime.core.data.container.CloseableRowIterator)21 DataRow (org.knime.core.data.DataRow)13 BufferedDataTable (org.knime.core.node.BufferedDataTable)12 DataCell (org.knime.core.data.DataCell)8 DataTableSpec (org.knime.core.data.DataTableSpec)6 ArrayList (java.util.ArrayList)5 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)4 Color (java.awt.Color)3 HashMap (java.util.HashMap)2 Set (java.util.Set)2 TreeSet (java.util.TreeSet)2 DataColumnSpec (org.knime.core.data.DataColumnSpec)2 DefaultRow (org.knime.core.data.def.DefaultRow)2 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)2 Interpreter (bsh.Interpreter)1 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 IOException (java.io.IOException)1 BitSet (java.util.BitSet)1 Collection (java.util.Collection)1