Search in sources :

Example 21 with BitVectorValue

use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.

the class RegressionTrainingRow method getValue.

/**
 * Gets the value from the {@code cell} with the {@code 0}-based {@code index} in case it was a collection.
 *
 * @param cell A {@link DataCell}, probably a collection of numbers/bytes/bits.
 * @param index The index to select from a collection.
 * @param missingHandling How to handle missing values.
 * @return The number at the specified position.
 * @since 3.1
 */
public static double getValue(final DataCell cell, final int index, final MissingHandling missingHandling) {
    if (cell instanceof BitVectorValue) {
        final BitVectorValue bvv = (BitVectorValue) cell;
        if (bvv.length() > index) {
            return bvv.get(index) ? 1d : 0d;
        }
        // TODO NaN, or 0?
        return Double.NaN;
    } else if (cell instanceof ByteVectorValue) {
        final ByteVectorValue bvv = (ByteVectorValue) cell;
        if (bvv.length() > index) {
            return bvv.get(index);
        }
        // Maybe some other value?
        return Double.NaN;
    } else if (cell instanceof ListDataValue) {
        final ListDataValue ldv = (ListDataValue) cell;
        if (ldv.size() > index) {
            DataCell dataCell = ldv.get(index);
            if (dataCell instanceof DoubleValue) {
                final DoubleValue dv = (DoubleValue) dataCell;
                return dv.getDoubleValue();
            }
            missingHandling.isMissing(dataCell);
            return Double.NaN;
        }
    }
    throw new IllegalStateException("Not a missing, nor a vector value: " + cell);
}
Also used : ListDataValue(org.knime.core.data.collection.ListDataValue) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue)

Example 22 with BitVectorValue

use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.

the class SubgroupMinerModel2 method preprocessCollCells.

/**
 *The preprocessing of the cells, if the selected column is a collection.
 * the collection values are saved internally, and a bitvector is
 * created for each transaction.
 *
 * @param input the data table.
 * @param exec the execution context.
 * @return the list of bitvectors
 */
private List<BitVectorValue> preprocessCollCells(final BufferedDataTable inData, final ExecutionMonitor exec, final List<DataCell> nameMapping, final Map<Integer, RowKey> tidRowKeyMapping, final AtomicInteger maxBitsetLength) throws CanceledExecutionException {
    final Map<DataCell, Integer> cell2ItemMap = new HashMap<DataCell, Integer>();
    int transIndex = inData.getDataTableSpec().findColumnIndex(m_transactionColumn.getStringValue());
    for (final DataRow row : inData) {
        final DataCell cell = row.getCell(transIndex);
        if (!cell.isMissing()) {
            final CollectionDataValue colCell = (CollectionDataValue) cell;
            for (final DataCell valCell : colCell) {
                exec.checkCanceled();
                if (!cell2ItemMap.containsKey(valCell)) {
                    cell2ItemMap.put(valCell, cell2ItemMap.size());
                    nameMapping.add(valCell);
                }
            }
        }
    }
    // afterwards create the bitvectors
    int nrOfRows = 0;
    int totalNrRows = inData.getRowCount();
    List<BitVectorValue> bitSets = new ArrayList<BitVectorValue>();
    for (final DataRow row : inData) {
        exec.checkCanceled();
        DataCell dc = row.getCell(transIndex);
        if (dc.isMissing()) {
            continue;
        }
        CollectionDataValue currCell = ((CollectionDataValue) row.getCell(transIndex));
        SparseBitVector bitvec = new SparseBitVector(nameMapping.size());
        for (final DataCell valCell : currCell) {
            exec.checkCanceled();
            Integer itemID = cell2ItemMap.get(valCell);
            assert (itemID != null);
            bitvec.set(itemID.intValue(), true);
        }
        if (currCell.size() > Integer.MAX_VALUE) {
            throw new IllegalArgumentException("bit vector in row " + row.getKey().getString() + " is too long: " + currCell.size() + ". Only bit vectors up to " + Integer.MAX_VALUE + " are supported by this node.");
        }
        bitSets.add(new SparseBitVectorCellFactory(bitvec).createDataCell());
        tidRowKeyMapping.put(nrOfRows, row.getKey());
        nrOfRows++;
        exec.setProgress((double) nrOfRows / (double) totalNrRows, "preprocessing..." + nrOfRows);
    }
    maxBitsetLength.set(nameMapping.size());
    LOGGER.debug("max length: " + maxBitsetLength.get());
    return bitSets;
}
Also used : SparseBitVector(org.knime.core.data.vector.bitvector.SparseBitVector) HashMap(java.util.HashMap) SparseBitVectorCellFactory(org.knime.core.data.vector.bitvector.SparseBitVectorCellFactory) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DataCell(org.knime.core.data.DataCell) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue)

Example 23 with BitVectorValue

use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.

the class SubgroupMinerModel2 method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable input = inData[0];
    DataTableSpec spec = input.getDataTableSpec();
    ExecutionMonitor exec1 = exec.createSubProgress(0.5);
    ExecutionMonitor exec2 = exec.createSubProgress(0.5);
    Map<Integer, RowKey> tidRowKeyMapping = new HashMap<Integer, RowKey>();
    LinkedList<DataCell> nameMapping = new LinkedList<DataCell>();
    List<BitVectorValue> transactions;
    AtomicInteger maxBitsetLength = new AtomicInteger(0);
    if (spec.getColumnSpec(m_transactionColumn.getStringValue()).getType().isCompatible(BitVectorValue.class)) {
        transactions = preprocess(input, exec1, tidRowKeyMapping, maxBitsetLength);
        List<String> columnstrings = spec.getColumnSpec(m_transactionColumn.getStringValue()).getElementNames();
        for (String s : columnstrings) {
            nameMapping.add(new StringCell(s));
        }
        // fix #2505: use maximum bitset length
        maxBitsetLength.set(Math.max(maxBitsetLength.get(), nameMapping.size()));
    } else if (spec.getColumnSpec(m_transactionColumn.getStringValue()).getType().isCompatible(CollectionDataValue.class)) {
        transactions = preprocessCollCells(input, exec1, nameMapping, tidRowKeyMapping, maxBitsetLength);
    // for the name Mapping is taken care in the preprocessing
    } else {
        // data value.
        throw new IOException("Selected column is not a possible transaction");
    }
    AprioriAlgorithm apriori = AprioriAlgorithmFactory.getAprioriAlgorithm(AprioriAlgorithmFactory.AlgorithmDataStructure.valueOf(m_underlyingStruct.getStringValue()), maxBitsetLength.get(), input.getRowCount());
    LOGGER.debug("support: " + m_minSupport);
    LOGGER.debug(m_minSupport + " start apriori: " + new Date());
    try {
        apriori.findFrequentItemSets(transactions, m_minSupport.getDoubleValue(), m_maxItemSetLength.getIntValue(), FrequentItemSet.Type.valueOf(m_itemSetType.getStringValue()), exec2);
    } catch (OutOfMemoryError oome) {
        throw new OutOfMemoryError("Execution resulted in an out of memory error, " + "please increase the support threshold.");
    }
    LOGGER.debug("ended apriori: " + new Date());
    BufferedDataTable itemSetTable = createOutputTable(spec, exec, apriori, nameMapping);
    return new BufferedDataTable[] { itemSetTable };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) AprioriAlgorithm(org.knime.base.node.mine.subgroupminer.apriori.AprioriAlgorithm) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) IOException(java.io.IOException) LinkedList(java.util.LinkedList) Date(java.util.Date) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StringCell(org.knime.core.data.def.StringCell) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue)

Example 24 with BitVectorValue

use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.

the class ArrayApriori method findFrequentItems.

/**
 * First of all it starts to identify those items which are frequent at all.
 * Then it creates a mapping, where the whole transaction length (all items)
 * are mapped to the array position of only the frequent ones. Thus, the
 * algorithm works with the mostly much shorter array of frequent items
 * only.
 *
 * @param transactions the database as bitsets
 */
private void findFrequentItems(final List<BitVectorValue> transactions) {
    int[] items = new int[m_bitSetLength + 1];
    m_mapping = new int[m_bitSetLength + 1];
    List<Integer> frequentItems = new ArrayList<Integer>();
    for (BitVectorValue s : transactions) {
        // SubgroupMinerNodeModel#preprocess
        for (int i = (int) s.nextSetBit(0); i >= 0; i = (int) s.nextSetBit(i + 1)) {
            // simply increment the position
            // that is probably faster than checking whether it might be
            // frequent
            items[i]++;
        }
    }
    int listPos = 0;
    for (int i = 0; i < items.length; i++) {
        if (((double) items[i] / (double) m_dbsize) >= m_minSupport) {
            frequentItems.add(i);
            m_mapping[i] = listPos++;
        } else {
            m_mapping[i] = -1;
        }
    }
    m_compressedLength = frequentItems.size();
    m_backwardMapping = new int[m_compressedLength];
    for (int i = 0; i < m_compressedLength; i++) {
        m_backwardMapping[i] = frequentItems.get(i);
    }
    filterAlwaysFrequentItems(items);
}
Also used : ArrayList(java.util.ArrayList) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue)

Example 25 with BitVectorValue

use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.

the class TIDApriori method findFrequentItems.

/**
 * Identify those items which occur in a sufficient, that is the minimum
 * support, number of transactions and stores them with the ids of the
 * transactions they appear in. At the end the always frequent items, which
 * occur in every transaction are filtered.
 *
 * @param transactions the database containing the transactions as BitSets
 * @param exec the execution monitor
 * @throws CanceledExecutionException if user cancels execution
 */
public void findFrequentItems(final List<BitVectorValue> transactions, final ExecutionMonitor exec) throws CanceledExecutionException {
    m_frequentItems = new ArrayList<TIDItem>();
    int transactionNr = 0;
    for (BitVectorValue transaction : transactions) {
        double progress = transactionNr / (double) m_dbsize;
        exec.setProgress(progress, "detecting frequent items. Transaction nr: " + transactionNr);
        exec.checkCanceled();
        // SubgroupMinerModel2#preprocess
        for (int item = (int) transaction.nextSetBit(0); item >= 0; item = (int) transaction.nextSetBit(item + 1)) {
            // but if dbsize - transactionNr > minSupport
            if (!m_frequentItems.contains(new TIDItem(item))) {
                // System.out.println(m_frequentItems + " does not contain "
                // + item);
                // if ((transactions.size()- transactionNr)>= m_minSupport) {
                // System.out.println(" possible: " +
                // (transactions.size() - transactionNr) + " >= " +
                // m_minSupport);
                TIDItem tidItem = new TIDItem(item);
                tidItem.addTID(transactionNr);
                m_frequentItems.add(tidItem);
            // added item to m_frequentItems
            // }
            } else {
                // find it and add this transaction id to it
                for (int j = 0; j < m_frequentItems.size(); j++) {
                    if (m_frequentItems.get(j).equals(new TIDItem(item))) {
                        // check if it still could become frequent
                        // int counterSoFar = m_frequentItems.get(j)
                        // .getSupport();
                        // if (counterSoFar + (transactions.size()
                        // - transactionNr) >= m_minSupport) {
                        TIDItem freqItem = m_frequentItems.get(j);
                        freqItem.addTID(transactionNr);
                        m_frequentItems.set(j, freqItem);
                        break;
                    // } else {
                    // kick, delete and destroy it:
                    // m_frequentItems.remove(j);
                    // break;
                    // }
                    }
                }
            }
        }
        transactionNr++;
    /*-------------------one iteration----------------------*/
    }
    List<TIDItem> candidateFrequent = new ArrayList<TIDItem>();
    candidateFrequent.addAll(m_frequentItems);
    for (TIDItem i : candidateFrequent) {
        if (i.getSupport() < m_minSupport) {
            m_frequentItems.remove(i);
        }
    }
    Collections.sort(m_frequentItems);
// LOGGER.debug("frequent items: " + m_frequentItems);
}
Also used : ArrayList(java.util.ArrayList) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue)

Aggregations

BitVectorValue (org.knime.core.data.vector.bitvector.BitVectorValue)26 DataCell (org.knime.core.data.DataCell)14 ByteVectorValue (org.knime.core.data.vector.bytevector.ByteVectorValue)7 ArrayList (java.util.ArrayList)5 DataRow (org.knime.core.data.DataRow)5 StringCell (org.knime.core.data.def.StringCell)4 BufferedDataTable (org.knime.core.node.BufferedDataTable)4 LinkedHashMap (java.util.LinkedHashMap)3 DefaultRow (org.knime.core.data.def.DefaultRow)3 IntCell (org.knime.core.data.def.IntCell)3 DenseBitVectorCellFactory (org.knime.core.data.vector.bitvector.DenseBitVectorCellFactory)3 BitSet (java.util.BitSet)2 Date (java.util.Date)2 HashMap (java.util.HashMap)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 PredictorRecord (org.knime.base.node.mine.treeensemble.data.PredictorRecord)2 DataType (org.knime.core.data.DataType)2 DoubleValue (org.knime.core.data.DoubleValue)2 RowKey (org.knime.core.data.RowKey)2 CollectionDataValue (org.knime.core.data.collection.CollectionDataValue)2