use of org.knime.core.data.container.CloseableRowIterator in project knime-core by knime.
the class BootstrapNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
// Init random object
long seed = m_configuration.getUseSeed() ? m_configuration.getSeed() : System.currentTimeMillis();
Random random = new Random(seed);
// Create containers for output tables
BufferedDataContainer bootstrap = exec.createDataContainer(getSamplesSpec(inData[0].getDataTableSpec()));
BufferedDataContainer holdout = exec.createDataContainer(inData[0].getDataTableSpec());
// Create iterator for all rows
CloseableRowIterator iterator = inData[0].iterator();
int numberOfRows = inData[0].getRowCount();
// Init unprocessed rows with amount of all rows
int unprocessedRows = numberOfRows;
// Create progress object with amount of all rows
Progress progress = new Progress(numberOfRows, exec);
// Calculate number of samples
int numberOfSamples;
if (m_configuration.getInPercent()) {
numberOfSamples = Math.round(numberOfRows * (m_configuration.getPercent() / 100));
} else {
numberOfSamples = m_configuration.getSize();
}
// Execute while until every row has been processed
while (unprocessedRows > 0) {
int chunkSize;
int numberOfChunkSamples;
// and will take care of fixing rounding issues
if (unprocessedRows > MAX_CHUNK_SIZE) {
// Set to biggest allowed size
chunkSize = MAX_CHUNK_SIZE;
// Calculate amount of samples relative to the size of this chunk
numberOfChunkSamples = Math.round((chunkSize / (float) numberOfRows) * numberOfSamples);
} else {
// Make this chunk as big as there are rows left
chunkSize = unprocessedRows;
// Generate the rest of the samples
// (this will take care of rounding errors that may occur in the relative calculation)
// we never put more than 2^31 rows in the bootstrap container, therefore it's safe to cast to int
numberOfChunkSamples = numberOfSamples - (int) bootstrap.size();
}
// Sample this chunk
sampleChunk(iterator, chunkSize, numberOfChunkSamples, bootstrap, holdout, random, progress);
// Mark chunked rows as processed
unprocessedRows -= chunkSize;
}
iterator.close();
bootstrap.close();
holdout.close();
return new BufferedDataTable[] { bootstrap.getTable(), holdout.getTable() };
}
use of org.knime.core.data.container.CloseableRowIterator in project knime-core by knime.
the class HiliteFilterNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
DataTableSpec inSpec = inData[0].getDataTableSpec();
BufferedDataContainer bufIn = exec.createDataContainer(inSpec);
BufferedDataContainer bufOut = exec.createDataContainer(inSpec);
synchronized (m_inHdl) {
double rowCnt = inData[0].size();
CloseableRowIterator it = inData[0].iterator();
for (long i = 0; i < rowCnt; i++) {
DataRow row = it.next();
if (m_inHdl.isHiLit(row.getKey())) {
bufIn.addRowToTable(row);
} else {
bufOut.addRowToTable(row);
}
exec.checkCanceled();
exec.setProgress((i + 1) / rowCnt);
}
}
bufIn.close();
bufOut.close();
m_inHdl.addHiLiteListener(this);
return new BufferedDataTable[] { bufIn.getTable(), bufOut.getTable() };
}
use of org.knime.core.data.container.CloseableRowIterator in project knime-core by knime.
the class FixedWidthFRTable method iterator.
/**
* {@inheritDoc}
*/
@Override
public CloseableRowIterator iterator() {
try {
synchronized (m_iterators) {
CloseableRowIterator i = createRowIterator(m_nodeSettings, m_tableSpec, m_exec);
m_iterators.add(new WeakReference<CloseableRowIterator>(i));
return i;
}
} catch (IOException ioe) {
LOGGER.error("I/O Error occurred while trying to open a stream" + " to '" + m_nodeSettings.getFileLocation().toString() + "'.");
}
return null;
}
use of org.knime.core.data.container.CloseableRowIterator in project GenericKnimeNodes by genericworkflownodes.
the class BeanShellNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
BufferedDataContainer container1 = null;
Interpreter ip = new Interpreter();
ip.eval(script_init);
CloseableRowIterator iter = null;
if (!script_firstPass.equals("")) {
iter = inData[0].iterator();
while (iter.hasNext()) {
ip.set("INROW", fillInRow(iter.next()));
ip.eval(script_firstPass);
}
iter.close();
}
iter = inData[0].iterator();
int idx = 1;
boolean first = true;
while (iter.hasNext()) {
ip.set("OUTROW", new OutRow());
ip.set("INROW", fillInRow(iter.next()));
ip.eval(script_secondPass);
OutRow out = (OutRow) ip.get("OUTROW");
if (out.isNull()) {
continue;
}
if (first) {
container1 = exec.createDataContainer(getDataTableSpec2(out));
first = false;
}
List<Object> values = out.getValues();
int N = values.size();
int i = 0;
DataCell[] cells = new DataCell[N];
for (Object value : values) {
cells[i++] = getCell(value);
}
DefaultRow row = new DefaultRow("Row " + idx++, cells);
container1.addRowToTable(row);
}
iter.close();
container1.close();
BufferedDataTable out1 = container1.getTable();
return new BufferedDataTable[] { out1 };
}
use of org.knime.core.data.container.CloseableRowIterator in project knime-core by knime.
the class Joiner method performJoin.
/**
* This method start with reading the partitions of the left table defined
* in currParts. If memory is low, partitions will be skipped or the
* number of partitions will be raised which leads to smaller partitions.
* Successfully read partitions will be joined. The return collection
* defines the successfully processed partitions.
*
* @param leftTable The inner input table.
* @param rightTable The right input table.
* @param outputContainer The container used for storing matches.
* @param pendingParts The parts that are not processed yet.
* @param exec The execution context.
* @param progressDiff The difference in the progress monitor.
* @return The partitions that were successfully processed (read + joined).
* @throws CanceledExecutionException when execution is canceled
*/
private Collection<Integer> performJoin(final BufferedDataTable leftTable, final BufferedDataTable rightTable, final JoinContainer outputContainer, final Collection<Integer> pendingParts, final ExecutionContext exec, final double progressDiff) throws CanceledExecutionException {
// Update increment for reporting progress
double progress = exec.getProgressMonitor().getProgress();
double numRows = leftTable.size() + rightTable.size();
double inc = (progressDiff - progress) / numRows;
Collection<Integer> currParts = new ArrayList<Integer>();
currParts.addAll(pendingParts);
setMessage("Read", exec, pendingParts, currParts);
// Partition left table
Map<Integer, Map<JoinTuple, Set<Integer>>> leftTableHashed = new HashMap<Integer, Map<JoinTuple, Set<Integer>>>();
// This is only used when m_leftRetain is true and m_matchAny is false.
// It holds the row indices of the left table that do not match to
// any row of the right table
Map<Integer, Set<Integer>> leftOuterJoins = new HashMap<Integer, Set<Integer>>();
MemoryActionIndicator memIndicator = MemoryAlertSystem.getInstance().newIndicator();
int counter = 0;
long rowsAdded = 0;
CloseableRowIterator leftIter = leftTable.iterator();
while (leftIter.hasNext()) {
exec.checkCanceled();
boolean saveToAddMoreRows = !memIndicator.lowMemoryActionRequired() && ((m_rowsAddedBeforeForcedOOM == 0) || (rowsAdded % m_rowsAddedBeforeForcedOOM != (m_rowsAddedBeforeForcedOOM - 1)));
if (saveToAddMoreRows) {
DataRow row = leftIter.next();
InputRow inputDataRow = new InputRow(row, counter, InputRow.Settings.InDataPort.Left, m_inputDataRowSettings);
for (JoinTuple tuple : inputDataRow.getJoinTuples()) {
int partition = tuple.hashCode() & m_bitMask;
if (currParts.contains(partition)) {
addRow(leftTableHashed, leftOuterJoins, partition, tuple, inputDataRow);
rowsAdded++;
}
}
counter++;
// report progress
progress += inc;
exec.getProgressMonitor().setProgress(progress);
} else {
rowsAdded++;
// Build list of partitions that are not empty
List<Integer> nonEmptyPartitions = new ArrayList<Integer>();
for (Integer i : currParts) {
if (null != leftTableHashed.get(i)) {
nonEmptyPartitions.add(i);
}
}
int numNonEmpty = nonEmptyPartitions.size();
if (numNonEmpty > 1) {
// remove input partitions to free memory
List<Integer> removeParts = new ArrayList<Integer>();
for (int i = 0; i < numNonEmpty / 2; i++) {
removeParts.add(nonEmptyPartitions.get(i));
}
// remove collected data of the no longer processed
for (int i : removeParts) {
leftTableHashed.remove(i);
if (m_retainLeft && !m_matchAny) {
leftOuterJoins.remove(i);
}
}
currParts.removeAll(removeParts);
LOGGER.debug("Skip partitions while " + "reading inner table. Currently Processed: " + currParts + ". Skip: " + removeParts);
// update increment for reporting progress
numRows += leftTable.size() + rightTable.size();
inc = (progressDiff - progress) / numRows;
setMessage("Read", exec, pendingParts, currParts);
} else if (nonEmptyPartitions.size() == 1) {
if (m_numBits < m_numBitsMaximal) {
LOGGER.debug("Increase number of partitions while " + "reading inner table. Currently " + "Processed: " + nonEmptyPartitions);
// increase number of partitions
m_numBits = m_numBits + 1;
m_bitMask = m_bitMask | (0x0001 << (m_numBits - 1));
Set<Integer> pending = new TreeSet<Integer>();
pending.addAll(pendingParts);
pendingParts.clear();
for (int i : pending) {
pendingParts.add(i);
int ii = i | (0x0001 << (m_numBits - 1));
pendingParts.add(ii);
}
int currPart = nonEmptyPartitions.iterator().next();
currParts.clear();
currParts.add(currPart);
// update chunk size
retainPartitions(leftTableHashed, leftOuterJoins, currPart);
// update increment for reporting progress
numRows += leftTable.size() + rightTable.size();
inc = (progressDiff - progress) / numRows;
setMessage("Read", exec, pendingParts, currParts);
} else {
// We have now 2^32 partitions.
// We can only keep going and hope that other nodes
// may free some memory.
LOGGER.warn("Memory is low. " + "I have no chance to free memory. This may " + "cause an endless loop.");
}
} else if (nonEmptyPartitions.size() < 1) {
// We have only empty partitions.
// Other node consume to much memory,
// we cannot free more memory
LOGGER.warn("Memory is low. " + "I have no chance to free memory. This may " + "cause an endless loop.");
}
}
}
setMessage("Join", exec, pendingParts, currParts);
// Join with outer table
joinInMemory(leftTableHashed, leftOuterJoins, currParts, rightTable, outputContainer, exec, inc);
// Log which parts were successfully joined
for (int part : currParts) {
int numTuples = leftTableHashed.get(part) != null ? leftTableHashed.get(part).values().size() : 0;
LOGGER.debug("Joined " + part + " with " + numTuples + " tuples.");
}
// Garbage collector has problems without this explicit clearance.
leftTableHashed.clear();
leftOuterJoins.clear();
// return successfully joined parts
return currParts;
}
Aggregations