Search in sources :

Example 6 with BitVectorValue

use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.

the class AbstractTrainingRowBuilder method build.

@Override
public T build(final DataRow row, final int id) {
    int nonZeroFeatures = 1;
    int accumulatedIdx = 1;
    // the intercept feature is always present
    m_nonZeroIndices[0] = 0;
    m_nonZeroValues[0] = 1.0F;
    for (int i = 0; i < m_featureCellIndices.size(); i++) {
        // get cell from row
        Integer cellIdx = m_featureCellIndices.get(i);
        DataCell cell = row.getCell(cellIdx);
        DataType cellType = cell.getType();
        // handle cell according to cell type
        if (cellType.isCompatible(NominalValue.class)) {
            // handle nominal cells
            List<DataCell> nominalDomainValues = m_nominalDomainValues.get(cellIdx);
            int oneHotIdx = nominalDomainValues.indexOf(cell);
            if (oneHotIdx == -1) {
                throw new IllegalStateException("DataCell \"" + cell.toString() + "\" is not in the DataColumnDomain. Please apply a " + "Domain Calculator on the columns with nominal values.");
            } else if (oneHotIdx > 0) {
                m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx + oneHotIdx - 1;
                m_nonZeroValues[nonZeroFeatures] = 1.0F;
                nonZeroFeatures++;
            }
            accumulatedIdx += nominalDomainValues.size() - 1;
        } else if (m_vectorLengths.containsKey(cellIdx)) {
            // handle vector cells
            if (cellType.isCompatible(BitVectorValue.class)) {
                BitVectorValue bv = (BitVectorValue) cell;
                for (long s = bv.nextSetBit(0L); s >= 0; s = bv.nextSetBit(s + 1)) {
                    m_nonZeroIndices[nonZeroFeatures] = (int) (accumulatedIdx + s);
                    m_nonZeroValues[nonZeroFeatures++] = 1.0F;
                }
            } else if (cellType.isCompatible(ByteVectorValue.class)) {
                ByteVectorValue bv = (ByteVectorValue) cell;
                for (long s = bv.nextCountIndex(0L); s >= 0; s = bv.nextCountIndex(s + 1)) {
                    m_nonZeroIndices[nonZeroFeatures] = (int) (accumulatedIdx + s);
                    m_nonZeroValues[nonZeroFeatures++] = bv.get(s);
                }
            // uncomment once DoubleVectors can be used with PMML
            // } else if (cellType.isCompatible(DoubleVectorValue.class)) {
            // // DoubleVectorValue also implements CollectionDataValue but
            // // as it then first boxes its values into DataCells, it is much more
            // // efficient to access its values via the DoubleVectorValue interface
            // DoubleVectorValue dv = (DoubleVectorValue)cell;
            // for (int s = 0; s < dv.getLength(); s++) {
            // float val = (float)dv.getValue(s);
            // if (!MathUtils.equals(val, 0.0)) {
            // m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx + s;
            // m_nonZeroValues[nonZeroFeatures++] = val;
            // }
            // }
            // uncomment once double lists become compatible with PMML
            // } else if (cellType.isCollectionType() && cellType.getCollectionElementType().isCompatible(DoubleValue.class)) {
            // CollectionDataValue cv = (CollectionDataValue)cell;
            // int s = 0;
            // for (DataCell c : cv) {
            // // we already checked above that cv contains DoubleValues
            // DoubleValue dv = (DoubleValue)c;
            // double val = dv.getDoubleValue();
            // if (!MathUtils.equals(val, 0.0)) {
            // m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx + s;
            // m_nonZeroValues[nonZeroFeatures] = (float)val;
            // }
            // s++;
            // }
            } else {
                // should never be thrown because we check the compatibility in the constructor
                throw new IllegalStateException("DataCell \"" + cell.toString() + "\" is of an unknown vector/collections type.");
            }
            accumulatedIdx += m_vectorLengths.get(cellIdx);
        } else if (cellType.isCompatible(DoubleValue.class)) {
            // handle numerical cells
            double val = ((DoubleValue) cell).getDoubleValue();
            if (!MathUtils.equals(val, 0.0)) {
                m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx;
                m_nonZeroValues[nonZeroFeatures++] = (float) val;
            }
            accumulatedIdx++;
        } else {
            // a different DataCell of incompatible type.
            throw new IllegalStateException("The DataCell \"" + cell.toString() + "\" is of incompatible type \"" + cellType.toPrettyString() + "\".");
        }
    }
    int[] nonZero = Arrays.copyOf(m_nonZeroIndices, nonZeroFeatures);
    float[] values = Arrays.copyOf(m_nonZeroValues, nonZeroFeatures);
    return createTrainingRow(row, nonZero, values, id);
}
Also used : ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue)

Example 7 with BitVectorValue

use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.

the class SubgroupMinerModel2 method preprocess.

private List<BitVectorValue> preprocess(final DataTable inData, final ExecutionMonitor exec, final Map<Integer, RowKey> tidRowKeyMapping, final AtomicInteger maxBitsetLength) throws CanceledExecutionException {
    int nrOfRows = 0;
    int totalNrRows = ((BufferedDataTable) inData).getRowCount();
    List<BitVectorValue> bitSets = new ArrayList<BitVectorValue>();
    int bitVectorIndex = inData.getDataTableSpec().findColumnIndex(m_transactionColumn.getStringValue());
    if (bitVectorIndex < 0) {
        return new ArrayList<BitVectorValue>();
    }
    for (DataRow currRow : inData) {
        exec.checkCanceled();
        DataCell dc = currRow.getCell(bitVectorIndex);
        if (dc.isMissing()) {
            continue;
        }
        BitVectorValue currCell = ((BitVectorValue) currRow.getCell(bitVectorIndex));
        if (currCell.length() > Integer.MAX_VALUE) {
            throw new IllegalArgumentException("bit vector in row " + currRow.getKey().getString() + " is too long: " + currCell.length() + ". Only bit vectors up to " + Integer.MAX_VALUE + " are supported by this node.");
        }
        maxBitsetLength.set(Math.max(maxBitsetLength.get(), (int) currCell.length()));
        bitSets.add(currCell);
        tidRowKeyMapping.put(nrOfRows, currRow.getKey());
        nrOfRows++;
        exec.setProgress((double) nrOfRows / (double) totalNrRows, "preprocessing..." + nrOfRows);
    }
    LOGGER.debug("max length: " + maxBitsetLength.get());
    return bitSets;
}
Also used : BufferedDataTable(org.knime.core.node.BufferedDataTable) ArrayList(java.util.ArrayList) DataCell(org.knime.core.data.DataCell) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue) DataRow(org.knime.core.data.DataRow)

Example 8 with BitVectorValue

use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.

the class ArrayApriori method findFrequentItemSets.

/**
 * Finds the frequent itemsets by going down the tree until the current
 * build level is reached, there it counts those items which are present in
 * the transaction. This implies, that it can count only those items, for
 * which a path is present in the tree, that is, which have frequent
 * predecessors. When the counting is finished, new children are created for
 * those itemsets, which might become frequent in the next level, that is,
 * itemsets with one item more.
 *
 * {@inheritDoc}
 */
@Override
public void findFrequentItemSets(final List<BitVectorValue> transactions, final double minSupport, final int maxDepth, final FrequentItemSet.Type type, final ExecutionMonitor exec) throws CanceledExecutionException {
    m_minSupport = minSupport;
    m_dbsize = transactions.size();
    findFrequentItems(transactions);
    m_root = new ArrayPrefixTreeNode(m_compressedLength, null, -1);
    m_builtLevel = 0;
    do {
        m_transactionNr = 0;
        for (BitVectorValue s : transactions) {
            exec.checkCanceled();
            if (s.cardinality() == 0) {
                continue;
            }
            count(s, m_root, 0, 0);
            m_transactionNr++;
        }
        m_childCreated = false;
        createChildren(m_root, 0, 0, exec);
        m_builtLevel++;
        exec.setProgress((1.0 - (1.0 / m_builtLevel)), "building level: " + m_builtLevel);
    } while (m_childCreated && m_builtLevel < maxDepth);
}
Also used : BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue)

Example 9 with BitVectorValue

use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.

the class ExpandBitVectorNodeModel method createCellFactory.

/**
 * {@inheritDoc}
 */
@Override
protected AbstractCellFactory createCellFactory(final String[] colNames, final DataColumnSpec[] outputColumns, final int inputIndex) {
    return new AbstractCellFactory(outputColumns) {

        @Override
        public DataCell[] getCells(final DataRow row) {
            DataCell[] vs = new DataCell[colNames.length];
            DataCell cell = row.getCell(inputIndex);
            if (cell instanceof BitVectorValue) {
                BitVectorValue bvv = (BitVectorValue) cell;
                int length = Math.min(vs.length, (int) bvv.length());
                for (int i = length; i-- > 0; ) {
                    vs[i] = VALUES[bvv.get(i) ? 1 : 0];
                }
                for (int i = vs.length; i-- > length; ) {
                    vs[i] = DataType.getMissingCell();
                }
            } else {
                for (int i = 0; i < vs.length; i++) {
                    vs[i] = DataType.getMissingCell();
                }
            }
            return vs;
        }
    };
}
Also used : AbstractCellFactory(org.knime.core.data.container.AbstractCellFactory) DataCell(org.knime.core.data.DataCell) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue) DataRow(org.knime.core.data.DataRow)

Example 10 with BitVectorValue

use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.

the class BitVectorIntersectionOperator method computeInternal.

/**
 * {@inheritDoc}
 */
@Override
protected boolean computeInternal(final DataCell cell) {
    if (cell instanceof BitVectorValue) {
        BitVectorValue val = (BitVectorValue) cell;
        if (m_v == null) {
            final DenseBitVectorCellFactory dbv = new DenseBitVectorCellFactory(val.length());
            long nextSetBit = val.nextSetBit(0);
            while (nextSetBit >= 0) {
                dbv.set(nextSetBit);
                nextSetBit = val.nextSetBit(nextSetBit + 1);
            }
            m_v = dbv.createDataCell();
        } else {
            m_v = DenseBitVectorCellFactory.and(m_v, val);
        }
    }
    return false;
}
Also used : BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue) DenseBitVectorCellFactory(org.knime.core.data.vector.bitvector.DenseBitVectorCellFactory)

Aggregations

BitVectorValue (org.knime.core.data.vector.bitvector.BitVectorValue)26 DataCell (org.knime.core.data.DataCell)14 ByteVectorValue (org.knime.core.data.vector.bytevector.ByteVectorValue)7 ArrayList (java.util.ArrayList)5 DataRow (org.knime.core.data.DataRow)5 StringCell (org.knime.core.data.def.StringCell)4 BufferedDataTable (org.knime.core.node.BufferedDataTable)4 LinkedHashMap (java.util.LinkedHashMap)3 DefaultRow (org.knime.core.data.def.DefaultRow)3 IntCell (org.knime.core.data.def.IntCell)3 DenseBitVectorCellFactory (org.knime.core.data.vector.bitvector.DenseBitVectorCellFactory)3 BitSet (java.util.BitSet)2 Date (java.util.Date)2 HashMap (java.util.HashMap)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 PredictorRecord (org.knime.base.node.mine.treeensemble.data.PredictorRecord)2 DataType (org.knime.core.data.DataType)2 DoubleValue (org.knime.core.data.DoubleValue)2 RowKey (org.knime.core.data.RowKey)2 CollectionDataValue (org.knime.core.data.collection.CollectionDataValue)2