Search in sources :

Example 1 with SparseBitVector

use of org.knime.core.data.vector.bitvector.SparseBitVector in project knime-core by knime.

the class SubgroupMinerModel2 method preprocessCollCells.

/**
 *The preprocessing of the cells, if the selected column is a collection.
 * the collection values are saved internally, and a bitvector is
 * created for each transaction.
 *
 * @param input the data table.
 * @param exec the execution context.
 * @return the list of bitvectors
 */
private List<BitVectorValue> preprocessCollCells(final BufferedDataTable inData, final ExecutionMonitor exec, final List<DataCell> nameMapping, final Map<Integer, RowKey> tidRowKeyMapping, final AtomicInteger maxBitsetLength) throws CanceledExecutionException {
    final Map<DataCell, Integer> cell2ItemMap = new HashMap<DataCell, Integer>();
    int transIndex = inData.getDataTableSpec().findColumnIndex(m_transactionColumn.getStringValue());
    for (final DataRow row : inData) {
        final DataCell cell = row.getCell(transIndex);
        if (!cell.isMissing()) {
            final CollectionDataValue colCell = (CollectionDataValue) cell;
            for (final DataCell valCell : colCell) {
                exec.checkCanceled();
                if (!cell2ItemMap.containsKey(valCell)) {
                    cell2ItemMap.put(valCell, cell2ItemMap.size());
                    nameMapping.add(valCell);
                }
            }
        }
    }
    // afterwards create the bitvectors
    int nrOfRows = 0;
    int totalNrRows = inData.getRowCount();
    List<BitVectorValue> bitSets = new ArrayList<BitVectorValue>();
    for (final DataRow row : inData) {
        exec.checkCanceled();
        DataCell dc = row.getCell(transIndex);
        if (dc.isMissing()) {
            continue;
        }
        CollectionDataValue currCell = ((CollectionDataValue) row.getCell(transIndex));
        SparseBitVector bitvec = new SparseBitVector(nameMapping.size());
        for (final DataCell valCell : currCell) {
            exec.checkCanceled();
            Integer itemID = cell2ItemMap.get(valCell);
            assert (itemID != null);
            bitvec.set(itemID.intValue(), true);
        }
        if (currCell.size() > Integer.MAX_VALUE) {
            throw new IllegalArgumentException("bit vector in row " + row.getKey().getString() + " is too long: " + currCell.size() + ". Only bit vectors up to " + Integer.MAX_VALUE + " are supported by this node.");
        }
        bitSets.add(new SparseBitVectorCellFactory(bitvec).createDataCell());
        tidRowKeyMapping.put(nrOfRows, row.getKey());
        nrOfRows++;
        exec.setProgress((double) nrOfRows / (double) totalNrRows, "preprocessing..." + nrOfRows);
    }
    maxBitsetLength.set(nameMapping.size());
    LOGGER.debug("max length: " + maxBitsetLength.get());
    return bitSets;
}
Also used : SparseBitVector(org.knime.core.data.vector.bitvector.SparseBitVector) HashMap(java.util.HashMap) SparseBitVectorCellFactory(org.knime.core.data.vector.bitvector.SparseBitVectorCellFactory) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DataCell(org.knime.core.data.DataCell) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue)

Aggregations

ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 DataCell (org.knime.core.data.DataCell)1 DataRow (org.knime.core.data.DataRow)1 CollectionDataValue (org.knime.core.data.collection.CollectionDataValue)1 BitVectorValue (org.knime.core.data.vector.bitvector.BitVectorValue)1 SparseBitVector (org.knime.core.data.vector.bitvector.SparseBitVector)1 SparseBitVectorCellFactory (org.knime.core.data.vector.bitvector.SparseBitVectorCellFactory)1