use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.
the class SubsetMatcherNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final BufferedDataTable subsetTable = inData[0];
final DataTableSpec subsetTableSpec = subsetTable.getSpec();
final int subsetColIdx = subsetTableSpec.findColumnIndex(m_subsetCol.getStringValue());
// the comparator that should be used to sort the subset AND the
// set list
final Comparator<DataCell> comparator = subsetTableSpec.getColumnSpec(subsetColIdx).getType().getComparator();
final BufferedDataTable setTable = inData[1];
final DataTableSpec setTableSpec = setTable.getSpec();
final int setIDColIdx;
final DataColumnSpec setIDSpec;
if (m_setIDCol.useRowID()) {
setIDColIdx = -1;
setIDSpec = null;
} else {
setIDColIdx = setTableSpec.findColumnIndex(m_setIDCol.getStringValue());
setIDSpec = setTableSpec.getColumnSpec(setIDColIdx);
}
final int transColIdx = setTableSpec.findColumnIndex(m_setCol.getStringValue());
final boolean appendSetCol = m_appendSetListCol.getBooleanValue();
// create the data container
final DataTableSpec resultSpec = createTableSpec(setIDSpec, setTableSpec.getColumnSpec(transColIdx), subsetTableSpec.getColumnSpec(subsetColIdx), appendSetCol);
m_dc = exec.createDataContainer(resultSpec);
final long subsetRowCount = subsetTable.size();
if (subsetRowCount == 0) {
setWarningMessage("Empty subset table found");
m_dc.close();
return new BufferedDataTable[] { m_dc.getTable() };
}
final long setRowCount = setTable.size();
if (setRowCount == 0) {
setWarningMessage("Empty set table found");
m_dc.close();
return new BufferedDataTable[] { m_dc.getTable() };
}
final double totalRowCount = subsetRowCount + setRowCount * SET_PROCESSING_FACTOR;
final ExecutionMonitor subsetExec = exec.createSubProgress(subsetRowCount / totalRowCount);
// create the rule model
exec.setMessage("Generating subset base...");
final SubsetMatcher[] sortedMatcher = createSortedMatcher(subsetExec, subsetTable, subsetColIdx, comparator);
subsetExec.setProgress(1.0);
if (sortedMatcher.length < 1) {
setWarningMessage("No item sets found");
m_dc.close();
return new BufferedDataTable[] { m_dc.getTable() };
}
final ExecutionMonitor setExec = exec.createSubProgress((setRowCount * SET_PROCESSING_FACTOR) / totalRowCount);
// create the matching processes
exec.setMessage("Processing sets... ");
// initialize the thread pool for parallelization of the set
// analysis
final ThreadPool pool = KNIMEConstants.GLOBAL_THREAD_POOL.createSubPool(1);
for (final DataRow row : setTable) {
exec.checkCanceled();
DataCell setIDCell;
if (setIDColIdx < 0) {
final RowKey key = row.getKey();
setIDCell = new StringCell(key.getString());
} else {
setIDCell = row.getCell(setIDColIdx);
}
final DataCell setCell = row.getCell(transColIdx);
if (!(setCell instanceof CollectionDataValue)) {
setExec.setProgress(m_setCounter.incrementAndGet() / (double) setRowCount);
m_skipCounter.incrementAndGet();
continue;
}
final CollectionDataValue setList = (CollectionDataValue) setCell;
if (setList.size() < 1) {
// skip empty sets
setExec.setProgress(m_setCounter.incrementAndGet() / (double) setRowCount);
m_skipCounter.incrementAndGet();
continue;
}
// submit for each set a job in the thread pool
pool.enqueue(createRunnable(setExec, setRowCount, setIDCell, setList, appendSetCol, comparator, sortedMatcher, m_maxMismatches.getIntValue()));
}
// wait until all jobs are finished before closing the container
// and returning the method
pool.waitForTermination();
exec.setMessage("Creating data table...");
m_dc.close();
if (m_skipCounter.intValue() > 0) {
setWarningMessage("No matching subsets found for " + m_skipCounter + " out of " + setRowCount + " sets");
}
exec.setProgress(1.0);
return new BufferedDataTable[] { m_dc.getTable() };
}
use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.
the class UngroupOperation method compute.
/**
* Performs the ungroup operation on the given row input and pushes the result to the row output.
*
* @param in the row input, will NOT be closed when finished
* @param out the row input, will NOT be closed when finished
* @param exec the execution context to check cancellation and (optional) progress logging
* @param rowCount row count to track the progress or <code>-1</code> without progress tracking
* @throws Exception the thrown exception
* @since 3.2
*/
public void compute(final RowInput in, final RowOutput out, final ExecutionContext exec, final long rowCount) throws Exception {
final Map<RowKey, Set<RowKey>> hiliteMapping = new HashMap<RowKey, Set<RowKey>>();
@SuppressWarnings("unchecked") Iterator<DataCell>[] iterators = new Iterator[m_colIndices.length];
final DataCell[] missingCells = new DataCell[m_colIndices.length];
Arrays.fill(missingCells, DataType.getMissingCell());
long rowCounter = 0;
DataRow row = null;
while ((row = in.poll()) != null) {
rowCounter++;
exec.checkCanceled();
if (rowCount > 0) {
exec.setProgress(rowCounter / (double) rowCount, "Processing row " + rowCounter + " of " + rowCount);
}
boolean allMissing = true;
for (int i = 0, length = m_colIndices.length; i < length; i++) {
final DataCell cell = row.getCell(m_colIndices[i]);
final CollectionDataValue listCell;
final Iterator<DataCell> iterator;
if (cell instanceof CollectionDataValue) {
listCell = (CollectionDataValue) cell;
iterator = listCell.iterator();
allMissing = false;
} else {
iterator = null;
}
iterators[i] = iterator;
}
if (allMissing) {
// with missing cells as well if the skip missing value option is disabled
if (!m_skipMissingValues) {
final DefaultRow newRow = createClone(row.getKey(), row, m_colIndices, m_removeCollectionCol, missingCells);
if (m_enableHilite) {
// create the hilite entry
final Set<RowKey> keys = new HashSet<RowKey>(1);
keys.add(row.getKey());
hiliteMapping.put(row.getKey(), keys);
}
out.push(newRow);
}
continue;
}
long counter = 1;
final Set<RowKey> keys;
if (m_enableHilite) {
keys = new HashSet<RowKey>();
} else {
keys = null;
}
boolean continueLoop = false;
boolean allEmpty = true;
do {
// reset the loop flag
allMissing = true;
continueLoop = false;
final DataCell[] newCells = new DataCell[iterators.length];
for (int i = 0, length = iterators.length; i < length; i++) {
Iterator<DataCell> iterator = iterators[i];
DataCell newCell;
if (iterator != null && iterator.hasNext()) {
allEmpty = false;
continueLoop = true;
newCell = iterator.next();
} else {
if (iterator == null) {
allEmpty = false;
}
newCell = DataType.getMissingCell();
}
if (!newCell.isMissing()) {
allMissing = false;
}
newCells[i] = newCell;
}
if (!allEmpty && !continueLoop) {
break;
}
if (!allEmpty && allMissing && m_skipMissingValues) {
continue;
}
final RowKey oldKey = row.getKey();
final RowKey newKey = new RowKey(oldKey.getString() + "_" + counter++);
final DefaultRow newRow = createClone(newKey, row, m_colIndices, m_removeCollectionCol, newCells);
out.push(newRow);
if (keys != null) {
keys.add(newKey);
}
} while (continueLoop);
if (keys != null && !keys.isEmpty()) {
hiliteMapping.put(row.getKey(), keys);
}
}
if (m_enableHilite) {
m_trans.setMapper(new DefaultHiLiteMapper(hiliteMapping));
}
}
use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.
the class SubsetMatcherNodeModel method createSortedMatcher.
private SubsetMatcher[] createSortedMatcher(final ExecutionMonitor exec, final BufferedDataTable table, final int colIdx, final Comparator<DataCell> comparator) throws CanceledExecutionException {
final Map<DataCell, SubsetMatcher> map = new HashMap<>();
final long rowCount = table.size();
if (rowCount < 1) {
return new SubsetMatcher[0];
}
long counter = 1;
for (final DataRow row : table) {
exec.checkCanceled();
exec.setProgress(counter / (double) rowCount, "Processing subset " + counter + " of " + rowCount);
final DataCell cell = row.getCell(colIdx);
if (!(cell instanceof CollectionDataValue)) {
// skip missing cells and none collection cells
continue;
}
final CollectionDataValue collectionCell = (CollectionDataValue) cell;
if (collectionCell.size() <= 0) {
// skip empty collections
continue;
}
final DataCell[] itemSet = collectionCell2SortedArray(collectionCell, comparator);
final DataCell rootItem = itemSet[0];
SubsetMatcher matcher = map.get(rootItem);
if (matcher == null) {
matcher = new SubsetMatcher(rootItem, comparator);
map.put(rootItem, matcher);
}
matcher.appendChildMatcher(itemSet, 1);
counter++;
}
final ArrayList<SubsetMatcher> matchers = new ArrayList<>(map.values());
Collections.sort(matchers);
return matchers.toArray(new SubsetMatcher[0]);
}
use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.
the class StringCompareRowFilter method matches.
/**
* {@inheritDoc}
*/
@Override
public boolean matches(final DataRow row, final long rowIndex) throws EndOfTableException, IncludeFromNowOn {
// loading filter's settings
assert getColIdx() >= 0;
DataCell theCell = row.getCell(getColIdx());
boolean match = false;
if (theCell.isMissing()) {
match = false;
} else {
if (getDeepFiltering() && (theCell instanceof CollectionDataValue)) {
match = performDeepFiltering((CollectionDataValue) theCell);
} else {
match = matches(theCell);
}
}
return ((getInclude() && match) || (!getInclude() && !match));
}
use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.
the class Collection2BitVectorCellFactory method getCell.
/**
* {@inheritDoc}
*/
@Override
public DataCell getCell(final DataRow row) {
incrementNrOfRows();
final DataCell cell = row.getCell(getColumnIndex());
if (cell.isMissing()) {
return DataType.getMissingCell();
}
if (cell instanceof CollectionDataValue) {
org.knime.core.data.vector.bitvector.BitVectorCellFactory<? extends DataCell> factory = getVectorType().getCellFactory(m_idxMap.size());
final CollectionDataValue collCell = (CollectionDataValue) cell;
for (final DataCell valCell : collCell) {
if (valCell.isMissing()) {
continue;
}
final Integer bitIdx = m_idxMap.get(valCell.toString());
if (bitIdx != null) {
factory.set(bitIdx.intValue());
} else {
printError(LOGGER, row, "No bit index found for cell " + valCell.toString());
return DataType.getMissingCell();
}
}
m_nrOfSetBits += collCell.size();
m_nrOfNotSetBits += m_idxMap.size() - collCell.size();
return factory.createDataCell();
} else {
printError(LOGGER, row, "Incompatible type found");
return DataType.getMissingCell();
}
}
Aggregations