Search in sources :

Example 1 with MemoryActionIndicator

use of org.knime.core.data.util.memory.MemoryAlertSystem.MemoryActionIndicator in project knime-core by knime.

the class AbstractColumnTableSorterTest method testSortingWithLimitedFileHandler.

/**
 * Tests the sorting with limited file handles.
 *
 * @throws CanceledExecutionException
 * @throws InvalidSettingsException
 */
@Test
public void testSortingWithLimitedFileHandler() throws CanceledExecutionException, InvalidSettingsException {
    BufferedDataTable bt = createRandomTable(50, 5000);
    ColumnBufferedDataTableSorter dataTableSorter = new ColumnBufferedDataTableSorter(bt.getDataTableSpec(), bt.size(), bt.getDataTableSpec().getColumnNames());
    // more than 100 MB used
    long usageThreshold = MemoryAlertSystem.getUsedMemory() + (100 << 20);
    MemoryActionIndicator memIndicator = new MemoryActionIndicator() {

        @Override
        public boolean lowMemoryActionRequired() {
            MemoryAlertSystem.getInstance();
            return MemoryAlertSystem.getUsedMemory() > usageThreshold;
        }
    };
    dataTableSorter.setMemActionIndicator(memIndicator);
    dataTableSorter.setMaxOpenContainers(60);
    final Comparator<DataRow> ascendingOrderAssertion = createAscendingOrderAssertingComparator(bt, bt.getDataTableSpec().getColumnNames());
    dataTableSorter.sort(bt, m_exec, new SortingConsumer() {

        final AtomicReference<DataRow> lastRow = new AtomicReference<>();

        @Override
        public void consume(final DataRow defaultRow) {
            if (lastRow.get() != null) {
                ascendingOrderAssertion.compare(defaultRow, lastRow.get());
            }
            lastRow.set(defaultRow);
        }
    });
}
Also used : MemoryActionIndicator(org.knime.core.data.util.memory.MemoryAlertSystem.MemoryActionIndicator) BufferedDataTable(org.knime.core.node.BufferedDataTable) AtomicReference(java.util.concurrent.atomic.AtomicReference) DataRow(org.knime.core.data.DataRow) MemoryAlertSystemTest(org.knime.core.data.util.memory.MemoryAlertSystemTest) Test(org.junit.Test)

Example 2 with MemoryActionIndicator

use of org.knime.core.data.util.memory.MemoryAlertSystem.MemoryActionIndicator in project knime-core by knime.

the class Joiner method performJoin.

/**
 * This method start with reading the partitions of the left table defined
 * in currParts. If memory is low, partitions will be skipped or the
 * number of partitions will be raised which leads to smaller partitions.
 * Successfully read partitions will be joined. The return collection
 * defines the successfully processed partitions.
 *
 * @param leftTable The inner input table.
 * @param rightTable The right input table.
 * @param outputContainer The container used for storing matches.
 * @param pendingParts The parts that are not processed yet.
 * @param exec The execution context.
 * @param progressDiff The difference in the progress monitor.
 * @return The partitions that were successfully processed (read + joined).
 * @throws CanceledExecutionException when execution is canceled
 */
private Collection<Integer> performJoin(final BufferedDataTable leftTable, final BufferedDataTable rightTable, final JoinContainer outputContainer, final Collection<Integer> pendingParts, final ExecutionContext exec, final double progressDiff) throws CanceledExecutionException {
    // Update increment for reporting progress
    double progress = exec.getProgressMonitor().getProgress();
    double numRows = leftTable.size() + rightTable.size();
    double inc = (progressDiff - progress) / numRows;
    Collection<Integer> currParts = new ArrayList<Integer>();
    currParts.addAll(pendingParts);
    setMessage("Read", exec, pendingParts, currParts);
    // Partition left table
    Map<Integer, Map<JoinTuple, Set<Integer>>> leftTableHashed = new HashMap<Integer, Map<JoinTuple, Set<Integer>>>();
    // This is only used when m_leftRetain is true and m_matchAny is false.
    // It holds the row indices of the left table that do not match to
    // any row of the right table
    Map<Integer, Set<Integer>> leftOuterJoins = new HashMap<Integer, Set<Integer>>();
    MemoryActionIndicator memIndicator = MemoryAlertSystem.getInstance().newIndicator();
    int counter = 0;
    long rowsAdded = 0;
    CloseableRowIterator leftIter = leftTable.iterator();
    while (leftIter.hasNext()) {
        exec.checkCanceled();
        boolean saveToAddMoreRows = !memIndicator.lowMemoryActionRequired() && ((m_rowsAddedBeforeForcedOOM == 0) || (rowsAdded % m_rowsAddedBeforeForcedOOM != (m_rowsAddedBeforeForcedOOM - 1)));
        if (saveToAddMoreRows) {
            DataRow row = leftIter.next();
            InputRow inputDataRow = new InputRow(row, counter, InputRow.Settings.InDataPort.Left, m_inputDataRowSettings);
            for (JoinTuple tuple : inputDataRow.getJoinTuples()) {
                int partition = tuple.hashCode() & m_bitMask;
                if (currParts.contains(partition)) {
                    addRow(leftTableHashed, leftOuterJoins, partition, tuple, inputDataRow);
                    rowsAdded++;
                }
            }
            counter++;
            // report progress
            progress += inc;
            exec.getProgressMonitor().setProgress(progress);
        } else {
            rowsAdded++;
            // Build list of partitions that are not empty
            List<Integer> nonEmptyPartitions = new ArrayList<Integer>();
            for (Integer i : currParts) {
                if (null != leftTableHashed.get(i)) {
                    nonEmptyPartitions.add(i);
                }
            }
            int numNonEmpty = nonEmptyPartitions.size();
            if (numNonEmpty > 1) {
                // remove input partitions to free memory
                List<Integer> removeParts = new ArrayList<Integer>();
                for (int i = 0; i < numNonEmpty / 2; i++) {
                    removeParts.add(nonEmptyPartitions.get(i));
                }
                // remove collected data of the no longer processed
                for (int i : removeParts) {
                    leftTableHashed.remove(i);
                    if (m_retainLeft && !m_matchAny) {
                        leftOuterJoins.remove(i);
                    }
                }
                currParts.removeAll(removeParts);
                LOGGER.debug("Skip partitions while " + "reading inner table. Currently Processed: " + currParts + ". Skip: " + removeParts);
                // update increment for reporting progress
                numRows += leftTable.size() + rightTable.size();
                inc = (progressDiff - progress) / numRows;
                setMessage("Read", exec, pendingParts, currParts);
            } else if (nonEmptyPartitions.size() == 1) {
                if (m_numBits < m_numBitsMaximal) {
                    LOGGER.debug("Increase number of partitions while " + "reading inner table. Currently " + "Processed: " + nonEmptyPartitions);
                    // increase number of partitions
                    m_numBits = m_numBits + 1;
                    m_bitMask = m_bitMask | (0x0001 << (m_numBits - 1));
                    Set<Integer> pending = new TreeSet<Integer>();
                    pending.addAll(pendingParts);
                    pendingParts.clear();
                    for (int i : pending) {
                        pendingParts.add(i);
                        int ii = i | (0x0001 << (m_numBits - 1));
                        pendingParts.add(ii);
                    }
                    int currPart = nonEmptyPartitions.iterator().next();
                    currParts.clear();
                    currParts.add(currPart);
                    // update chunk size
                    retainPartitions(leftTableHashed, leftOuterJoins, currPart);
                    // update increment for reporting progress
                    numRows += leftTable.size() + rightTable.size();
                    inc = (progressDiff - progress) / numRows;
                    setMessage("Read", exec, pendingParts, currParts);
                } else {
                    // We have now 2^32 partitions.
                    // We can only keep going and hope that other nodes
                    // may free some memory.
                    LOGGER.warn("Memory is low. " + "I have no chance to free memory. This may " + "cause an endless loop.");
                }
            } else if (nonEmptyPartitions.size() < 1) {
                // We have only empty partitions.
                // Other node consume to much memory,
                // we cannot free more memory
                LOGGER.warn("Memory is low. " + "I have no chance to free memory. This may " + "cause an endless loop.");
            }
        }
    }
    setMessage("Join", exec, pendingParts, currParts);
    // Join with outer table
    joinInMemory(leftTableHashed, leftOuterJoins, currParts, rightTable, outputContainer, exec, inc);
    // Log which parts were successfully joined
    for (int part : currParts) {
        int numTuples = leftTableHashed.get(part) != null ? leftTableHashed.get(part).values().size() : 0;
        LOGGER.debug("Joined " + part + " with " + numTuples + " tuples.");
    }
    // Garbage collector has problems without this explicit clearance.
    leftTableHashed.clear();
    leftOuterJoins.clear();
    // return successfully joined parts
    return currParts;
}
Also used : TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CloseableRowIterator(org.knime.core.data.container.CloseableRowIterator) DataRow(org.knime.core.data.DataRow) MemoryActionIndicator(org.knime.core.data.util.memory.MemoryAlertSystem.MemoryActionIndicator) HashMap(java.util.HashMap) Map(java.util.Map)

Example 3 with MemoryActionIndicator

use of org.knime.core.data.util.memory.MemoryAlertSystem.MemoryActionIndicator in project knime-core by knime.

the class AbstractTableSorter method createInitialChunks.

private long createInitialChunks(final ExecutionMonitor exec, final DataTable dataTable) throws CanceledExecutionException {
    long outerCounter;
    long counter = 0;
    ArrayList<DataRow> buffer = new ArrayList<DataRow>();
    long chunkStartRow = 0;
    int rowsInCurrentChunk = 0;
    MemoryActionIndicator memObservable = m_memService.newIndicator();
    exec.setMessage("Reading table");
    for (Iterator<DataRow> iter = dataTable.iterator(); iter.hasNext(); ) {
        counter++;
        rowsInCurrentChunk++;
        exec.checkCanceled();
        String message = "Reading table, " + counter + " rows read";
        if (m_rowsInInputTable > 0) {
            m_progress += m_incProgress;
            exec.setProgress(m_progress, message);
        } else {
            exec.setMessage(message);
        }
        DataRow row = iter.next();
        buffer.add(row);
        if ((memObservable.lowMemoryActionRequired() && (rowsInCurrentChunk >= m_maxOpenContainers)) || (counter % m_maxRowsPerChunk == 0)) {
            LOGGER.debug("Writing chunk [" + chunkStartRow + ":" + counter + "] - mem usage: " + getMemUsage());
            if (m_rowsInInputTable > 0) {
                long estimatedIncrements = m_rowsInInputTable - counter + buffer.size();
                m_incProgress = (0.5 - m_progress) / estimatedIncrements;
            }
            exec.setMessage("Sorting temporary buffer");
            // sort buffer
            Collections.sort(buffer, m_rowComparator);
            // write buffer to disk
            openChunk();
            final int totalBufferSize = buffer.size();
            for (int i = 0; i < totalBufferSize; i++) {
                exec.setMessage("Writing temporary table -- " + i + "/" + totalBufferSize);
                // must not use Iterator#remove as it causes
                // array copies
                DataRow next = buffer.set(i, null);
                addRowToChunk(next);
                exec.checkCanceled();
                if (m_rowsInInputTable > 0) {
                    m_progress += m_incProgress;
                    exec.setProgress(m_progress);
                }
            }
            buffer.clear();
            closeChunk();
            LOGGER.debug("Wrote chunk [" + chunkStartRow + ":" + counter + "] - mem usage: " + getMemUsage());
            chunkStartRow = counter + 1;
            rowsInCurrentChunk = 0;
        }
    }
    // Add buffer to the chunks
    if (!buffer.isEmpty()) {
        // sort buffer
        Collections.sort(buffer, m_rowComparator);
        m_chunksContainer.add(buffer);
    }
    outerCounter = counter;
    return outerCounter;
}
Also used : MemoryActionIndicator(org.knime.core.data.util.memory.MemoryAlertSystem.MemoryActionIndicator) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow)

Aggregations

DataRow (org.knime.core.data.DataRow)3 MemoryActionIndicator (org.knime.core.data.util.memory.MemoryAlertSystem.MemoryActionIndicator)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 Set (java.util.Set)1 TreeSet (java.util.TreeSet)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Test (org.junit.Test)1 CloseableRowIterator (org.knime.core.data.container.CloseableRowIterator)1 MemoryAlertSystemTest (org.knime.core.data.util.memory.MemoryAlertSystemTest)1 BufferedDataTable (org.knime.core.node.BufferedDataTable)1