use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.
the class RegressionTrainingRow method getValue.
/**
* Gets the value from the {@code cell} with the {@code 0}-based {@code index} in case it was a collection.
*
* @param cell A {@link DataCell}, probably a collection of numbers/bytes/bits.
* @param index The index to select from a collection.
* @param missingHandling How to handle missing values.
* @return The number at the specified position.
* @since 3.1
*/
public static double getValue(final DataCell cell, final int index, final MissingHandling missingHandling) {
if (cell instanceof BitVectorValue) {
final BitVectorValue bvv = (BitVectorValue) cell;
if (bvv.length() > index) {
return bvv.get(index) ? 1d : 0d;
}
// TODO NaN, or 0?
return Double.NaN;
} else if (cell instanceof ByteVectorValue) {
final ByteVectorValue bvv = (ByteVectorValue) cell;
if (bvv.length() > index) {
return bvv.get(index);
}
// Maybe some other value?
return Double.NaN;
} else if (cell instanceof ListDataValue) {
final ListDataValue ldv = (ListDataValue) cell;
if (ldv.size() > index) {
DataCell dataCell = ldv.get(index);
if (dataCell instanceof DoubleValue) {
final DoubleValue dv = (DoubleValue) dataCell;
return dv.getDoubleValue();
}
missingHandling.isMissing(dataCell);
return Double.NaN;
}
}
throw new IllegalStateException("Not a missing, nor a vector value: " + cell);
}
use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.
the class SubgroupMinerModel2 method preprocessCollCells.
/**
*The preprocessing of the cells, if the selected column is a collection.
* the collection values are saved internally, and a bitvector is
* created for each transaction.
*
* @param input the data table.
* @param exec the execution context.
* @return the list of bitvectors
*/
private List<BitVectorValue> preprocessCollCells(final BufferedDataTable inData, final ExecutionMonitor exec, final List<DataCell> nameMapping, final Map<Integer, RowKey> tidRowKeyMapping, final AtomicInteger maxBitsetLength) throws CanceledExecutionException {
final Map<DataCell, Integer> cell2ItemMap = new HashMap<DataCell, Integer>();
int transIndex = inData.getDataTableSpec().findColumnIndex(m_transactionColumn.getStringValue());
for (final DataRow row : inData) {
final DataCell cell = row.getCell(transIndex);
if (!cell.isMissing()) {
final CollectionDataValue colCell = (CollectionDataValue) cell;
for (final DataCell valCell : colCell) {
exec.checkCanceled();
if (!cell2ItemMap.containsKey(valCell)) {
cell2ItemMap.put(valCell, cell2ItemMap.size());
nameMapping.add(valCell);
}
}
}
}
// afterwards create the bitvectors
int nrOfRows = 0;
int totalNrRows = inData.getRowCount();
List<BitVectorValue> bitSets = new ArrayList<BitVectorValue>();
for (final DataRow row : inData) {
exec.checkCanceled();
DataCell dc = row.getCell(transIndex);
if (dc.isMissing()) {
continue;
}
CollectionDataValue currCell = ((CollectionDataValue) row.getCell(transIndex));
SparseBitVector bitvec = new SparseBitVector(nameMapping.size());
for (final DataCell valCell : currCell) {
exec.checkCanceled();
Integer itemID = cell2ItemMap.get(valCell);
assert (itemID != null);
bitvec.set(itemID.intValue(), true);
}
if (currCell.size() > Integer.MAX_VALUE) {
throw new IllegalArgumentException("bit vector in row " + row.getKey().getString() + " is too long: " + currCell.size() + ". Only bit vectors up to " + Integer.MAX_VALUE + " are supported by this node.");
}
bitSets.add(new SparseBitVectorCellFactory(bitvec).createDataCell());
tidRowKeyMapping.put(nrOfRows, row.getKey());
nrOfRows++;
exec.setProgress((double) nrOfRows / (double) totalNrRows, "preprocessing..." + nrOfRows);
}
maxBitsetLength.set(nameMapping.size());
LOGGER.debug("max length: " + maxBitsetLength.get());
return bitSets;
}
use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.
the class SubgroupMinerModel2 method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable input = inData[0];
DataTableSpec spec = input.getDataTableSpec();
ExecutionMonitor exec1 = exec.createSubProgress(0.5);
ExecutionMonitor exec2 = exec.createSubProgress(0.5);
Map<Integer, RowKey> tidRowKeyMapping = new HashMap<Integer, RowKey>();
LinkedList<DataCell> nameMapping = new LinkedList<DataCell>();
List<BitVectorValue> transactions;
AtomicInteger maxBitsetLength = new AtomicInteger(0);
if (spec.getColumnSpec(m_transactionColumn.getStringValue()).getType().isCompatible(BitVectorValue.class)) {
transactions = preprocess(input, exec1, tidRowKeyMapping, maxBitsetLength);
List<String> columnstrings = spec.getColumnSpec(m_transactionColumn.getStringValue()).getElementNames();
for (String s : columnstrings) {
nameMapping.add(new StringCell(s));
}
// fix #2505: use maximum bitset length
maxBitsetLength.set(Math.max(maxBitsetLength.get(), nameMapping.size()));
} else if (spec.getColumnSpec(m_transactionColumn.getStringValue()).getType().isCompatible(CollectionDataValue.class)) {
transactions = preprocessCollCells(input, exec1, nameMapping, tidRowKeyMapping, maxBitsetLength);
// for the name Mapping is taken care in the preprocessing
} else {
// data value.
throw new IOException("Selected column is not a possible transaction");
}
AprioriAlgorithm apriori = AprioriAlgorithmFactory.getAprioriAlgorithm(AprioriAlgorithmFactory.AlgorithmDataStructure.valueOf(m_underlyingStruct.getStringValue()), maxBitsetLength.get(), input.getRowCount());
LOGGER.debug("support: " + m_minSupport);
LOGGER.debug(m_minSupport + " start apriori: " + new Date());
try {
apriori.findFrequentItemSets(transactions, m_minSupport.getDoubleValue(), m_maxItemSetLength.getIntValue(), FrequentItemSet.Type.valueOf(m_itemSetType.getStringValue()), exec2);
} catch (OutOfMemoryError oome) {
throw new OutOfMemoryError("Execution resulted in an out of memory error, " + "please increase the support threshold.");
}
LOGGER.debug("ended apriori: " + new Date());
BufferedDataTable itemSetTable = createOutputTable(spec, exec, apriori, nameMapping);
return new BufferedDataTable[] { itemSetTable };
}
use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.
the class ArrayApriori method findFrequentItems.
/**
* First of all it starts to identify those items which are frequent at all.
* Then it creates a mapping, where the whole transaction length (all items)
* are mapped to the array position of only the frequent ones. Thus, the
* algorithm works with the mostly much shorter array of frequent items
* only.
*
* @param transactions the database as bitsets
*/
private void findFrequentItems(final List<BitVectorValue> transactions) {
int[] items = new int[m_bitSetLength + 1];
m_mapping = new int[m_bitSetLength + 1];
List<Integer> frequentItems = new ArrayList<Integer>();
for (BitVectorValue s : transactions) {
// SubgroupMinerNodeModel#preprocess
for (int i = (int) s.nextSetBit(0); i >= 0; i = (int) s.nextSetBit(i + 1)) {
// simply increment the position
// that is probably faster than checking whether it might be
// frequent
items[i]++;
}
}
int listPos = 0;
for (int i = 0; i < items.length; i++) {
if (((double) items[i] / (double) m_dbsize) >= m_minSupport) {
frequentItems.add(i);
m_mapping[i] = listPos++;
} else {
m_mapping[i] = -1;
}
}
m_compressedLength = frequentItems.size();
m_backwardMapping = new int[m_compressedLength];
for (int i = 0; i < m_compressedLength; i++) {
m_backwardMapping[i] = frequentItems.get(i);
}
filterAlwaysFrequentItems(items);
}
use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.
the class TIDApriori method findFrequentItems.
/**
* Identify those items which occur in a sufficient, that is the minimum
* support, number of transactions and stores them with the ids of the
* transactions they appear in. At the end the always frequent items, which
* occur in every transaction are filtered.
*
* @param transactions the database containing the transactions as BitSets
* @param exec the execution monitor
* @throws CanceledExecutionException if user cancels execution
*/
public void findFrequentItems(final List<BitVectorValue> transactions, final ExecutionMonitor exec) throws CanceledExecutionException {
m_frequentItems = new ArrayList<TIDItem>();
int transactionNr = 0;
for (BitVectorValue transaction : transactions) {
double progress = transactionNr / (double) m_dbsize;
exec.setProgress(progress, "detecting frequent items. Transaction nr: " + transactionNr);
exec.checkCanceled();
// SubgroupMinerModel2#preprocess
for (int item = (int) transaction.nextSetBit(0); item >= 0; item = (int) transaction.nextSetBit(item + 1)) {
// but if dbsize - transactionNr > minSupport
if (!m_frequentItems.contains(new TIDItem(item))) {
// System.out.println(m_frequentItems + " does not contain "
// + item);
// if ((transactions.size()- transactionNr)>= m_minSupport) {
// System.out.println(" possible: " +
// (transactions.size() - transactionNr) + " >= " +
// m_minSupport);
TIDItem tidItem = new TIDItem(item);
tidItem.addTID(transactionNr);
m_frequentItems.add(tidItem);
// added item to m_frequentItems
// }
} else {
// find it and add this transaction id to it
for (int j = 0; j < m_frequentItems.size(); j++) {
if (m_frequentItems.get(j).equals(new TIDItem(item))) {
// check if it still could become frequent
// int counterSoFar = m_frequentItems.get(j)
// .getSupport();
// if (counterSoFar + (transactions.size()
// - transactionNr) >= m_minSupport) {
TIDItem freqItem = m_frequentItems.get(j);
freqItem.addTID(transactionNr);
m_frequentItems.set(j, freqItem);
break;
// } else {
// kick, delete and destroy it:
// m_frequentItems.remove(j);
// break;
// }
}
}
}
}
transactionNr++;
/*-------------------one iteration----------------------*/
}
List<TIDItem> candidateFrequent = new ArrayList<TIDItem>();
candidateFrequent.addAll(m_frequentItems);
for (TIDItem i : candidateFrequent) {
if (i.getSupport() < m_minSupport) {
m_frequentItems.remove(i);
}
}
Collections.sort(m_frequentItems);
// LOGGER.debug("frequent items: " + m_frequentItems);
}
Aggregations