use of org.knime.core.data.vector.bitvector.SparseBitVector in project knime-core by knime.
the class SubgroupMinerModel2 method preprocessCollCells.
/**
*The preprocessing of the cells, if the selected column is a collection.
* the collection values are saved internally, and a bitvector is
* created for each transaction.
*
* @param input the data table.
* @param exec the execution context.
* @return the list of bitvectors
*/
private List<BitVectorValue> preprocessCollCells(final BufferedDataTable inData, final ExecutionMonitor exec, final List<DataCell> nameMapping, final Map<Integer, RowKey> tidRowKeyMapping, final AtomicInteger maxBitsetLength) throws CanceledExecutionException {
final Map<DataCell, Integer> cell2ItemMap = new HashMap<DataCell, Integer>();
int transIndex = inData.getDataTableSpec().findColumnIndex(m_transactionColumn.getStringValue());
for (final DataRow row : inData) {
final DataCell cell = row.getCell(transIndex);
if (!cell.isMissing()) {
final CollectionDataValue colCell = (CollectionDataValue) cell;
for (final DataCell valCell : colCell) {
exec.checkCanceled();
if (!cell2ItemMap.containsKey(valCell)) {
cell2ItemMap.put(valCell, cell2ItemMap.size());
nameMapping.add(valCell);
}
}
}
}
// afterwards create the bitvectors
int nrOfRows = 0;
int totalNrRows = inData.getRowCount();
List<BitVectorValue> bitSets = new ArrayList<BitVectorValue>();
for (final DataRow row : inData) {
exec.checkCanceled();
DataCell dc = row.getCell(transIndex);
if (dc.isMissing()) {
continue;
}
CollectionDataValue currCell = ((CollectionDataValue) row.getCell(transIndex));
SparseBitVector bitvec = new SparseBitVector(nameMapping.size());
for (final DataCell valCell : currCell) {
exec.checkCanceled();
Integer itemID = cell2ItemMap.get(valCell);
assert (itemID != null);
bitvec.set(itemID.intValue(), true);
}
if (currCell.size() > Integer.MAX_VALUE) {
throw new IllegalArgumentException("bit vector in row " + row.getKey().getString() + " is too long: " + currCell.size() + ". Only bit vectors up to " + Integer.MAX_VALUE + " are supported by this node.");
}
bitSets.add(new SparseBitVectorCellFactory(bitvec).createDataCell());
tidRowKeyMapping.put(nrOfRows, row.getKey());
nrOfRows++;
exec.setProgress((double) nrOfRows / (double) totalNrRows, "preprocessing..." + nrOfRows);
}
maxBitsetLength.set(nameMapping.size());
LOGGER.debug("max length: " + maxBitsetLength.get());
return bitSets;
}
Aggregations