use of org.knime.base.data.bitvector.BitVectorCellFactory in project knime-core by knime.
the class CreateBitVectorNodeModel method createSingleColumnRearranger.
private ColumnRearranger createSingleColumnRearranger(final DataTableSpec spec, final int colIdx, final ColumnType columnType, final BitVectorType vectorType) throws InvalidSettingsException {
// are taken from input spec (21 Sep 2006)
try {
final BitVectorCellFactory factory = getSingleColFactory(null, colIdx, spec, null, columnType, vectorType);
ColumnRearranger c = new ColumnRearranger(spec);
if (m_remove.getBooleanValue()) {
c.replace(factory, colIdx);
} else {
c.append(factory);
}
return c;
} catch (CanceledExecutionException e) {
// this shouldn't happen since we do not provide the data to perform the preprocessing
throw new RuntimeException(e);
}
}
use of org.knime.base.data.bitvector.BitVectorCellFactory in project knime-core by knime.
the class CreateBitVectorNodeModel method createMultiColumnCellFactory.
private BitVectorCellFactory createMultiColumnCellFactory(final BufferedDataTable data, final ExecutionContext exec, final ColumnType columnType, final BitVectorType vectorType, final String[] multiCols) throws CanceledExecutionException, InvalidSettingsException {
final DataColumnSpec colSpec = createMultiColumnOutputSpec(data.getDataTableSpec(), multiCols, vectorType);
// get the indices for included columns
final int[] colIndices = new int[multiCols.length];
int idx = 0;
for (String colName : multiCols) {
int index = data.getDataTableSpec().findColumnIndex(colName);
if (index < 0) {
throw new IllegalArgumentException("Column " + colName + " is not available in input table. Please re-configure the node.");
}
colIndices[idx++] = index;
}
final BitVectorCellFactory factory;
if (ColumnType.MULTI_NUMERICAL.equals(columnType)) {
// calculate bits from numeric data
if (m_useMean.getBooleanValue()) {
// either from a percentage of the mean
final double meanFactor = m_meanPercentage.getIntValue() / 100.0;
final double[] meanValues = calculateMeanValues(exec.createSubProgress(0.5), data, colIndices);
factory = new Numeric2BitVectorMeanCellFactory(vectorType, colSpec, meanFactor, meanValues, colIndices);
} else {
// or dependent on fixed threshold
factory = new Numeric2BitVectorThresholdCellFactory(vectorType, colSpec, m_threshold.getDoubleValue(), colIndices);
}
} else if (ColumnType.MULTI_STRING.equals(columnType)) {
final boolean setMatching = SetMatching.MATCHING.equals(SetMatching.get(m_mscSetMatching.getStringValue()));
factory = new MultiString2BitVectorCellFactory(vectorType, colSpec, m_mscCaseSensitiv.getBooleanValue(), m_mscHasWildcards.getBooleanValue(), m_mscRegex.getBooleanValue(), setMatching, m_mscPattern.getStringValue(), colIndices);
} else {
throw new IllegalStateException("Not implemeted column type " + columnType.getText());
}
return factory;
}
use of org.knime.base.data.bitvector.BitVectorCellFactory in project knime-core by knime.
the class CreateBitVectorNodeModel method getSingleColFactory.
private BitVectorCellFactory getSingleColFactory(final ExecutionMonitor exec, final int colIdx, final DataTableSpec spec, final BufferedDataTable data, final ColumnType columnType, final BitVectorType vectorType) throws InvalidSettingsException, CanceledExecutionException {
final String outColName = m_outputColumn.getStringValue();
final DataColumnSpecCreator creator = new DataColumnSpecCreator(outColName, vectorType.getCellDataType());
final BitVectorCellFactory factory;
if (ColumnType.SINGLE_STRING.equals(columnType)) {
final StringType singleStringColType = StringType.getType(m_singleStringColumnType.getStringValue());
final DataColumnSpec colSpec = creator.createSpec();
switch(singleStringColType) {
case BIT:
factory = new BitString2BitVectorCellFactory(vectorType, colSpec, colIdx);
break;
case HEX:
factory = new Hex2BitVectorCellFactory(vectorType, colSpec, colIdx);
break;
case ID:
final int maxPosition;
if (data != null) {
final ExecutionMonitor scanExec = exec.createSubProgress(0.5);
exec.setMessage("preparing");
maxPosition = scanMaxPos(data, scanExec);
} else {
maxPosition = 0;
}
factory = new IdString2BitVectorCellFactory(vectorType, colSpec, colIdx, maxPosition);
break;
default:
throw new InvalidSettingsException("String type to parse bit vectors from unknown type " + singleStringColType.getActionCommand());
}
} else if (ColumnType.SINGLE_COLLECTION.equals(columnType)) {
final Map<String, Integer> idxMap;
if (data != null) {
final ExecutionMonitor scanExec = exec.createSubProgress(0.5);
scanExec.setMessage("preparing");
final List<String> elementNames = new ArrayList<>();
idxMap = new HashMap<>();
long nrRows = data.size();
long currRow = 0;
for (DataRow row : data) {
currRow++;
scanExec.setProgress((double) currRow / (double) nrRows, "Counting uniqe elements. Processing row " + currRow + " of " + nrRows);
scanExec.checkCanceled();
final DataCell cell = row.getCell(colIdx);
if (cell.isMissing()) {
// ignore missing cells
continue;
}
if (cell instanceof CollectionDataValue) {
final CollectionDataValue collCell = (CollectionDataValue) cell;
for (DataCell collVal : collCell) {
String stringRep = collVal.toString();
Integer idx = idxMap.get(stringRep);
if (idx == null) {
idx = Integer.valueOf(idxMap.size());
idxMap.put(stringRep, idx);
elementNames.add(stringRep);
}
}
} else {
throw new RuntimeException("Found incompatible type in row " + row.getKey().getString());
}
}
creator.setElementNames(elementNames.toArray(new String[0]));
} else {
idxMap = Collections.EMPTY_MAP;
}
factory = new Collection2BitVectorCellFactory(vectorType, creator.createSpec(), colIdx, idxMap);
} else {
throw new java.lang.IllegalStateException("Single column type not implemented: " + columnType);
}
return factory;
}
use of org.knime.base.data.bitvector.BitVectorCellFactory in project knime-core by knime.
the class CreateBitVectorNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final BufferedDataTable data = inData[0];
final DataTableSpec spec = data.getDataTableSpec();
final ColumnType columnType = ColumnType.getType(m_columnType.getStringValue());
final BitVectorType vectorType = BitVectorType.getType(m_vectorType.getStringValue());
final String[] parsedColumnNames;
final BitVectorCellFactory factory;
if (columnType.isMultiColumn()) {
final FilterResult multiColFilter = m_multiColumnsConfig.applyTo(spec);
parsedColumnNames = multiColFilter.getIncludes();
factory = createMultiColumnCellFactory(data, exec, columnType, vectorType, parsedColumnNames);
} else {
final int colIdx = spec.findColumnIndex(m_singleColumn.getStringValue());
factory = getSingleColFactory(exec, colIdx, spec, data, columnType, vectorType);
parsedColumnNames = new String[] { m_singleColumn.getStringValue() };
}
final ColumnRearranger c = new ColumnRearranger(spec);
if (m_remove.getBooleanValue()) {
if (columnType.isMultiColumn()) {
c.remove(parsedColumnNames);
c.append(factory);
} else {
c.replace(factory, m_singleColumn.getStringValue());
}
} else {
c.append(factory);
}
factory.setFailOnError(m_failOnError.getBooleanValue());
final ExecutionMonitor subExec;
if (ColumnType.MULTI_NUMERICAL.equals(columnType) || (ColumnType.MULTI_NUMERICAL.equals(columnType) && StringType.ID.equals(StringType.getType(m_singleStringColumnType.getStringValue()))) || ColumnType.SINGLE_COLLECTION.equals(columnType)) {
subExec = exec.createSubProgress(0.5);
} else {
subExec = exec;
}
final BufferedDataTable out = exec.createColumnRearrangeTable(data, c, subExec);
if (!factory.wasSuccessful() && data.size() > 0) {
final String errorMessage = factory.getNoOfPrintedErrors() + " errors found. Last message: " + factory.getLastErrorMessage() + ". See log file for details on all errors.";
setWarningMessage(errorMessage);
}
m_nrOfProcessedRows = factory.getNrOfProcessedRows();
m_totalNrOf0s = factory.getNumberOfNotSetBits();
m_totalNrOf1s = factory.getNumberOfSetBits();
return new BufferedDataTable[] { out };
}
Aggregations