use of org.knime.core.data.util.memory.MemoryAlertSystem.MemoryActionIndicator in project knime-core by knime.
the class AbstractColumnTableSorterTest method testSortingWithLimitedFileHandler.
/**
* Tests the sorting with limited file handles.
*
* @throws CanceledExecutionException
* @throws InvalidSettingsException
*/
@Test
public void testSortingWithLimitedFileHandler() throws CanceledExecutionException, InvalidSettingsException {
BufferedDataTable bt = createRandomTable(50, 5000);
ColumnBufferedDataTableSorter dataTableSorter = new ColumnBufferedDataTableSorter(bt.getDataTableSpec(), bt.size(), bt.getDataTableSpec().getColumnNames());
// more than 100 MB used
long usageThreshold = MemoryAlertSystem.getUsedMemory() + (100 << 20);
MemoryActionIndicator memIndicator = new MemoryActionIndicator() {
@Override
public boolean lowMemoryActionRequired() {
MemoryAlertSystem.getInstance();
return MemoryAlertSystem.getUsedMemory() > usageThreshold;
}
};
dataTableSorter.setMemActionIndicator(memIndicator);
dataTableSorter.setMaxOpenContainers(60);
final Comparator<DataRow> ascendingOrderAssertion = createAscendingOrderAssertingComparator(bt, bt.getDataTableSpec().getColumnNames());
dataTableSorter.sort(bt, m_exec, new SortingConsumer() {
final AtomicReference<DataRow> lastRow = new AtomicReference<>();
@Override
public void consume(final DataRow defaultRow) {
if (lastRow.get() != null) {
ascendingOrderAssertion.compare(defaultRow, lastRow.get());
}
lastRow.set(defaultRow);
}
});
}
use of org.knime.core.data.util.memory.MemoryAlertSystem.MemoryActionIndicator in project knime-core by knime.
the class Joiner method performJoin.
/**
* This method start with reading the partitions of the left table defined
* in currParts. If memory is low, partitions will be skipped or the
* number of partitions will be raised which leads to smaller partitions.
* Successfully read partitions will be joined. The return collection
* defines the successfully processed partitions.
*
* @param leftTable The inner input table.
* @param rightTable The right input table.
* @param outputContainer The container used for storing matches.
* @param pendingParts The parts that are not processed yet.
* @param exec The execution context.
* @param progressDiff The difference in the progress monitor.
* @return The partitions that were successfully processed (read + joined).
* @throws CanceledExecutionException when execution is canceled
*/
private Collection<Integer> performJoin(final BufferedDataTable leftTable, final BufferedDataTable rightTable, final JoinContainer outputContainer, final Collection<Integer> pendingParts, final ExecutionContext exec, final double progressDiff) throws CanceledExecutionException {
// Update increment for reporting progress
double progress = exec.getProgressMonitor().getProgress();
double numRows = leftTable.size() + rightTable.size();
double inc = (progressDiff - progress) / numRows;
Collection<Integer> currParts = new ArrayList<Integer>();
currParts.addAll(pendingParts);
setMessage("Read", exec, pendingParts, currParts);
// Partition left table
Map<Integer, Map<JoinTuple, Set<Integer>>> leftTableHashed = new HashMap<Integer, Map<JoinTuple, Set<Integer>>>();
// This is only used when m_leftRetain is true and m_matchAny is false.
// It holds the row indices of the left table that do not match to
// any row of the right table
Map<Integer, Set<Integer>> leftOuterJoins = new HashMap<Integer, Set<Integer>>();
MemoryActionIndicator memIndicator = MemoryAlertSystem.getInstance().newIndicator();
int counter = 0;
long rowsAdded = 0;
CloseableRowIterator leftIter = leftTable.iterator();
while (leftIter.hasNext()) {
exec.checkCanceled();
boolean saveToAddMoreRows = !memIndicator.lowMemoryActionRequired() && ((m_rowsAddedBeforeForcedOOM == 0) || (rowsAdded % m_rowsAddedBeforeForcedOOM != (m_rowsAddedBeforeForcedOOM - 1)));
if (saveToAddMoreRows) {
DataRow row = leftIter.next();
InputRow inputDataRow = new InputRow(row, counter, InputRow.Settings.InDataPort.Left, m_inputDataRowSettings);
for (JoinTuple tuple : inputDataRow.getJoinTuples()) {
int partition = tuple.hashCode() & m_bitMask;
if (currParts.contains(partition)) {
addRow(leftTableHashed, leftOuterJoins, partition, tuple, inputDataRow);
rowsAdded++;
}
}
counter++;
// report progress
progress += inc;
exec.getProgressMonitor().setProgress(progress);
} else {
rowsAdded++;
// Build list of partitions that are not empty
List<Integer> nonEmptyPartitions = new ArrayList<Integer>();
for (Integer i : currParts) {
if (null != leftTableHashed.get(i)) {
nonEmptyPartitions.add(i);
}
}
int numNonEmpty = nonEmptyPartitions.size();
if (numNonEmpty > 1) {
// remove input partitions to free memory
List<Integer> removeParts = new ArrayList<Integer>();
for (int i = 0; i < numNonEmpty / 2; i++) {
removeParts.add(nonEmptyPartitions.get(i));
}
// remove collected data of the no longer processed
for (int i : removeParts) {
leftTableHashed.remove(i);
if (m_retainLeft && !m_matchAny) {
leftOuterJoins.remove(i);
}
}
currParts.removeAll(removeParts);
LOGGER.debug("Skip partitions while " + "reading inner table. Currently Processed: " + currParts + ". Skip: " + removeParts);
// update increment for reporting progress
numRows += leftTable.size() + rightTable.size();
inc = (progressDiff - progress) / numRows;
setMessage("Read", exec, pendingParts, currParts);
} else if (nonEmptyPartitions.size() == 1) {
if (m_numBits < m_numBitsMaximal) {
LOGGER.debug("Increase number of partitions while " + "reading inner table. Currently " + "Processed: " + nonEmptyPartitions);
// increase number of partitions
m_numBits = m_numBits + 1;
m_bitMask = m_bitMask | (0x0001 << (m_numBits - 1));
Set<Integer> pending = new TreeSet<Integer>();
pending.addAll(pendingParts);
pendingParts.clear();
for (int i : pending) {
pendingParts.add(i);
int ii = i | (0x0001 << (m_numBits - 1));
pendingParts.add(ii);
}
int currPart = nonEmptyPartitions.iterator().next();
currParts.clear();
currParts.add(currPart);
// update chunk size
retainPartitions(leftTableHashed, leftOuterJoins, currPart);
// update increment for reporting progress
numRows += leftTable.size() + rightTable.size();
inc = (progressDiff - progress) / numRows;
setMessage("Read", exec, pendingParts, currParts);
} else {
// We have now 2^32 partitions.
// We can only keep going and hope that other nodes
// may free some memory.
LOGGER.warn("Memory is low. " + "I have no chance to free memory. This may " + "cause an endless loop.");
}
} else if (nonEmptyPartitions.size() < 1) {
// We have only empty partitions.
// Other node consume to much memory,
// we cannot free more memory
LOGGER.warn("Memory is low. " + "I have no chance to free memory. This may " + "cause an endless loop.");
}
}
}
setMessage("Join", exec, pendingParts, currParts);
// Join with outer table
joinInMemory(leftTableHashed, leftOuterJoins, currParts, rightTable, outputContainer, exec, inc);
// Log which parts were successfully joined
for (int part : currParts) {
int numTuples = leftTableHashed.get(part) != null ? leftTableHashed.get(part).values().size() : 0;
LOGGER.debug("Joined " + part + " with " + numTuples + " tuples.");
}
// Garbage collector has problems without this explicit clearance.
leftTableHashed.clear();
leftOuterJoins.clear();
// return successfully joined parts
return currParts;
}
use of org.knime.core.data.util.memory.MemoryAlertSystem.MemoryActionIndicator in project knime-core by knime.
the class AbstractTableSorter method createInitialChunks.
private long createInitialChunks(final ExecutionMonitor exec, final DataTable dataTable) throws CanceledExecutionException {
long outerCounter;
long counter = 0;
ArrayList<DataRow> buffer = new ArrayList<DataRow>();
long chunkStartRow = 0;
int rowsInCurrentChunk = 0;
MemoryActionIndicator memObservable = m_memService.newIndicator();
exec.setMessage("Reading table");
for (Iterator<DataRow> iter = dataTable.iterator(); iter.hasNext(); ) {
counter++;
rowsInCurrentChunk++;
exec.checkCanceled();
String message = "Reading table, " + counter + " rows read";
if (m_rowsInInputTable > 0) {
m_progress += m_incProgress;
exec.setProgress(m_progress, message);
} else {
exec.setMessage(message);
}
DataRow row = iter.next();
buffer.add(row);
if ((memObservable.lowMemoryActionRequired() && (rowsInCurrentChunk >= m_maxOpenContainers)) || (counter % m_maxRowsPerChunk == 0)) {
LOGGER.debug("Writing chunk [" + chunkStartRow + ":" + counter + "] - mem usage: " + getMemUsage());
if (m_rowsInInputTable > 0) {
long estimatedIncrements = m_rowsInInputTable - counter + buffer.size();
m_incProgress = (0.5 - m_progress) / estimatedIncrements;
}
exec.setMessage("Sorting temporary buffer");
// sort buffer
Collections.sort(buffer, m_rowComparator);
// write buffer to disk
openChunk();
final int totalBufferSize = buffer.size();
for (int i = 0; i < totalBufferSize; i++) {
exec.setMessage("Writing temporary table -- " + i + "/" + totalBufferSize);
// must not use Iterator#remove as it causes
// array copies
DataRow next = buffer.set(i, null);
addRowToChunk(next);
exec.checkCanceled();
if (m_rowsInInputTable > 0) {
m_progress += m_incProgress;
exec.setProgress(m_progress);
}
}
buffer.clear();
closeChunk();
LOGGER.debug("Wrote chunk [" + chunkStartRow + ":" + counter + "] - mem usage: " + getMemUsage());
chunkStartRow = counter + 1;
rowsInCurrentChunk = 0;
}
}
// Add buffer to the chunks
if (!buffer.isEmpty()) {
// sort buffer
Collections.sort(buffer, m_rowComparator);
m_chunksContainer.add(buffer);
}
outerCounter = counter;
return outerCounter;
}
Aggregations