use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.
the class AbstractTrainingRowBuilder method build.
@Override
public T build(final DataRow row, final int id) {
int nonZeroFeatures = 1;
int accumulatedIdx = 1;
// the intercept feature is always present
m_nonZeroIndices[0] = 0;
m_nonZeroValues[0] = 1.0F;
for (int i = 0; i < m_featureCellIndices.size(); i++) {
// get cell from row
Integer cellIdx = m_featureCellIndices.get(i);
DataCell cell = row.getCell(cellIdx);
DataType cellType = cell.getType();
// handle cell according to cell type
if (cellType.isCompatible(NominalValue.class)) {
// handle nominal cells
List<DataCell> nominalDomainValues = m_nominalDomainValues.get(cellIdx);
int oneHotIdx = nominalDomainValues.indexOf(cell);
if (oneHotIdx == -1) {
throw new IllegalStateException("DataCell \"" + cell.toString() + "\" is not in the DataColumnDomain. Please apply a " + "Domain Calculator on the columns with nominal values.");
} else if (oneHotIdx > 0) {
m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx + oneHotIdx - 1;
m_nonZeroValues[nonZeroFeatures] = 1.0F;
nonZeroFeatures++;
}
accumulatedIdx += nominalDomainValues.size() - 1;
} else if (m_vectorLengths.containsKey(cellIdx)) {
// handle vector cells
if (cellType.isCompatible(BitVectorValue.class)) {
BitVectorValue bv = (BitVectorValue) cell;
for (long s = bv.nextSetBit(0L); s >= 0; s = bv.nextSetBit(s + 1)) {
m_nonZeroIndices[nonZeroFeatures] = (int) (accumulatedIdx + s);
m_nonZeroValues[nonZeroFeatures++] = 1.0F;
}
} else if (cellType.isCompatible(ByteVectorValue.class)) {
ByteVectorValue bv = (ByteVectorValue) cell;
for (long s = bv.nextCountIndex(0L); s >= 0; s = bv.nextCountIndex(s + 1)) {
m_nonZeroIndices[nonZeroFeatures] = (int) (accumulatedIdx + s);
m_nonZeroValues[nonZeroFeatures++] = bv.get(s);
}
// uncomment once DoubleVectors can be used with PMML
// } else if (cellType.isCompatible(DoubleVectorValue.class)) {
// // DoubleVectorValue also implements CollectionDataValue but
// // as it then first boxes its values into DataCells, it is much more
// // efficient to access its values via the DoubleVectorValue interface
// DoubleVectorValue dv = (DoubleVectorValue)cell;
// for (int s = 0; s < dv.getLength(); s++) {
// float val = (float)dv.getValue(s);
// if (!MathUtils.equals(val, 0.0)) {
// m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx + s;
// m_nonZeroValues[nonZeroFeatures++] = val;
// }
// }
// uncomment once double lists become compatible with PMML
// } else if (cellType.isCollectionType() && cellType.getCollectionElementType().isCompatible(DoubleValue.class)) {
// CollectionDataValue cv = (CollectionDataValue)cell;
// int s = 0;
// for (DataCell c : cv) {
// // we already checked above that cv contains DoubleValues
// DoubleValue dv = (DoubleValue)c;
// double val = dv.getDoubleValue();
// if (!MathUtils.equals(val, 0.0)) {
// m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx + s;
// m_nonZeroValues[nonZeroFeatures] = (float)val;
// }
// s++;
// }
} else {
// should never be thrown because we check the compatibility in the constructor
throw new IllegalStateException("DataCell \"" + cell.toString() + "\" is of an unknown vector/collections type.");
}
accumulatedIdx += m_vectorLengths.get(cellIdx);
} else if (cellType.isCompatible(DoubleValue.class)) {
// handle numerical cells
double val = ((DoubleValue) cell).getDoubleValue();
if (!MathUtils.equals(val, 0.0)) {
m_nonZeroIndices[nonZeroFeatures] = accumulatedIdx;
m_nonZeroValues[nonZeroFeatures++] = (float) val;
}
accumulatedIdx++;
} else {
// a different DataCell of incompatible type.
throw new IllegalStateException("The DataCell \"" + cell.toString() + "\" is of incompatible type \"" + cellType.toPrettyString() + "\".");
}
}
int[] nonZero = Arrays.copyOf(m_nonZeroIndices, nonZeroFeatures);
float[] values = Arrays.copyOf(m_nonZeroValues, nonZeroFeatures);
return createTrainingRow(row, nonZero, values, id);
}
use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.
the class SubgroupMinerModel2 method preprocess.
private List<BitVectorValue> preprocess(final DataTable inData, final ExecutionMonitor exec, final Map<Integer, RowKey> tidRowKeyMapping, final AtomicInteger maxBitsetLength) throws CanceledExecutionException {
int nrOfRows = 0;
int totalNrRows = ((BufferedDataTable) inData).getRowCount();
List<BitVectorValue> bitSets = new ArrayList<BitVectorValue>();
int bitVectorIndex = inData.getDataTableSpec().findColumnIndex(m_transactionColumn.getStringValue());
if (bitVectorIndex < 0) {
return new ArrayList<BitVectorValue>();
}
for (DataRow currRow : inData) {
exec.checkCanceled();
DataCell dc = currRow.getCell(bitVectorIndex);
if (dc.isMissing()) {
continue;
}
BitVectorValue currCell = ((BitVectorValue) currRow.getCell(bitVectorIndex));
if (currCell.length() > Integer.MAX_VALUE) {
throw new IllegalArgumentException("bit vector in row " + currRow.getKey().getString() + " is too long: " + currCell.length() + ". Only bit vectors up to " + Integer.MAX_VALUE + " are supported by this node.");
}
maxBitsetLength.set(Math.max(maxBitsetLength.get(), (int) currCell.length()));
bitSets.add(currCell);
tidRowKeyMapping.put(nrOfRows, currRow.getKey());
nrOfRows++;
exec.setProgress((double) nrOfRows / (double) totalNrRows, "preprocessing..." + nrOfRows);
}
LOGGER.debug("max length: " + maxBitsetLength.get());
return bitSets;
}
use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.
the class ArrayApriori method findFrequentItemSets.
/**
* Finds the frequent itemsets by going down the tree until the current
* build level is reached, there it counts those items which are present in
* the transaction. This implies, that it can count only those items, for
* which a path is present in the tree, that is, which have frequent
* predecessors. When the counting is finished, new children are created for
* those itemsets, which might become frequent in the next level, that is,
* itemsets with one item more.
*
* {@inheritDoc}
*/
@Override
public void findFrequentItemSets(final List<BitVectorValue> transactions, final double minSupport, final int maxDepth, final FrequentItemSet.Type type, final ExecutionMonitor exec) throws CanceledExecutionException {
m_minSupport = minSupport;
m_dbsize = transactions.size();
findFrequentItems(transactions);
m_root = new ArrayPrefixTreeNode(m_compressedLength, null, -1);
m_builtLevel = 0;
do {
m_transactionNr = 0;
for (BitVectorValue s : transactions) {
exec.checkCanceled();
if (s.cardinality() == 0) {
continue;
}
count(s, m_root, 0, 0);
m_transactionNr++;
}
m_childCreated = false;
createChildren(m_root, 0, 0, exec);
m_builtLevel++;
exec.setProgress((1.0 - (1.0 / m_builtLevel)), "building level: " + m_builtLevel);
} while (m_childCreated && m_builtLevel < maxDepth);
}
use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.
the class ExpandBitVectorNodeModel method createCellFactory.
/**
* {@inheritDoc}
*/
@Override
protected AbstractCellFactory createCellFactory(final String[] colNames, final DataColumnSpec[] outputColumns, final int inputIndex) {
return new AbstractCellFactory(outputColumns) {
@Override
public DataCell[] getCells(final DataRow row) {
DataCell[] vs = new DataCell[colNames.length];
DataCell cell = row.getCell(inputIndex);
if (cell instanceof BitVectorValue) {
BitVectorValue bvv = (BitVectorValue) cell;
int length = Math.min(vs.length, (int) bvv.length());
for (int i = length; i-- > 0; ) {
vs[i] = VALUES[bvv.get(i) ? 1 : 0];
}
for (int i = vs.length; i-- > length; ) {
vs[i] = DataType.getMissingCell();
}
} else {
for (int i = 0; i < vs.length; i++) {
vs[i] = DataType.getMissingCell();
}
}
return vs;
}
};
}
use of org.knime.core.data.vector.bitvector.BitVectorValue in project knime-core by knime.
the class BitVectorIntersectionOperator method computeInternal.
/**
* {@inheritDoc}
*/
@Override
protected boolean computeInternal(final DataCell cell) {
if (cell instanceof BitVectorValue) {
BitVectorValue val = (BitVectorValue) cell;
if (m_v == null) {
final DenseBitVectorCellFactory dbv = new DenseBitVectorCellFactory(val.length());
long nextSetBit = val.nextSetBit(0);
while (nextSetBit >= 0) {
dbv.set(nextSetBit);
nextSetBit = val.nextSetBit(nextSetBit + 1);
}
m_v = dbv.createDataCell();
} else {
m_v = DenseBitVectorCellFactory.and(m_v, val);
}
}
return false;
}
Aggregations