use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.
the class CollectionSplitNodeModel method countNewColumns.
/**
* Iterate the argument table, determine maximum element count,
* return freshly created column specs.
*/
private DataColumnSpec[] countNewColumns(final BufferedDataTable table, final ExecutionMonitor exec) throws InvalidSettingsException, CanceledExecutionException {
DataTableSpec spec = table.getDataTableSpec();
long i = 0;
long rowCount = table.size();
int maxColumns = 0;
int targetColIndex = getTargetColIndex(spec);
for (DataRow row : table) {
DataCell c = row.getCell(targetColIndex);
if (!c.isMissing()) {
maxColumns = Math.max(((CollectionDataValue) c).size(), maxColumns);
}
exec.setProgress((i++) / (double) rowCount, "Determining maximum element count, row \"" + row.getKey() + "\" (" + i + "/" + rowCount + ")");
exec.checkCanceled();
}
HashSet<String> hashNames = new HashSet<String>();
for (DataColumnSpec s : spec) {
hashNames.add(s.getName());
}
if (m_settings.isReplaceInputColumn()) {
hashNames.remove(spec.getColumnSpec(targetColIndex).getName());
}
DataType elementType = spec.getColumnSpec(targetColIndex).getType().getCollectionElementType();
DataColumnSpec[] newColSpec = new DataColumnSpec[maxColumns];
for (int j = 0; j < newColSpec.length; j++) {
String baseName = "Split Value " + (j + 1);
String newName = baseName;
int uniquifier = 1;
while (!hashNames.add(newName)) {
newName = baseName + "(#" + (uniquifier++) + ")";
}
newColSpec[j] = new DataColumnSpecCreator(newName, elementType).createSpec();
}
return newColSpec;
}
use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.
the class UngroupOperation2 method compute.
/**
* Performs the ungroup operation on the given row input and pushes the result to the row output.
*
* @param in the row input, will NOT be closed when finished
* @param out the row input, will NOT be closed when finished
* @param exec the execution context to check cancellation and (optional) progress logging
* @param rowCount row count to track the progress or <code>-1</code> without progress tracking
* @param trans the hilite translater, will be modified directly. Must be non-null if hiliting is enabled, can be
* <code>null</code> otherwise
* @throws CanceledExecutionException if the execution has been canceled
* @throws InterruptedException if the execution has been interrupted
* @throws IllegalArgumentException if hiliting is enabled and no hilite translater is given
*/
public void compute(final RowInput in, final RowOutput out, final ExecutionContext exec, final long rowCount, final HiLiteTranslator trans) throws CanceledExecutionException, InterruptedException {
if (m_enableHilite && trans == null) {
throw new IllegalArgumentException("HiLiteTranslator must not be null when hiliting is enabled!");
}
final Map<RowKey, Set<RowKey>> hiliteMapping = new HashMap<RowKey, Set<RowKey>>();
@SuppressWarnings("unchecked") Iterator<DataCell>[] iterators = new Iterator[m_colIndices.length];
final DataCell[] missingCells = new DataCell[m_colIndices.length];
Arrays.fill(missingCells, DataType.getMissingCell());
long rowCounter = 0;
DataRow row = null;
while ((row = in.poll()) != null) {
rowCounter++;
exec.checkCanceled();
if (rowCount > 0) {
exec.setProgress(rowCounter / (double) rowCount, "Processing row " + rowCounter + " of " + rowCount);
}
boolean allMissing = true;
for (int i = 0, length = m_colIndices.length; i < length; i++) {
final DataCell cell = row.getCell(m_colIndices[i]);
final CollectionDataValue listCell;
final Iterator<DataCell> iterator;
if (cell instanceof CollectionDataValue) {
listCell = (CollectionDataValue) cell;
iterator = listCell.iterator();
allMissing = false;
} else {
iterator = null;
}
iterators[i] = iterator;
}
if (allMissing) {
// with missing cells as well if the skip missing value option is disabled
if (!m_skipMissingValues) {
final DefaultRow newRow = createClone(row.getKey(), row, m_colIndices, m_removeCollectionCol, missingCells);
if (m_enableHilite) {
// create the hilite entry
final Set<RowKey> keys = new HashSet<RowKey>(1);
keys.add(row.getKey());
hiliteMapping.put(row.getKey(), keys);
}
out.push(newRow);
}
continue;
}
long counter = 1;
final Set<RowKey> keys;
if (m_enableHilite) {
keys = new HashSet<RowKey>();
} else {
keys = null;
}
boolean continueLoop = false;
boolean allEmpty = true;
do {
// reset the loop flag
allMissing = true;
continueLoop = false;
final DataCell[] newCells = new DataCell[iterators.length];
for (int i = 0, length = iterators.length; i < length; i++) {
Iterator<DataCell> iterator = iterators[i];
DataCell newCell;
if (iterator != null && iterator.hasNext()) {
allEmpty = false;
continueLoop = true;
newCell = iterator.next();
} else {
if (iterator == null) {
allEmpty = false;
}
newCell = DataType.getMissingCell();
}
if (!newCell.isMissing()) {
allMissing = false;
}
newCells[i] = newCell;
}
if (!allEmpty && !continueLoop) {
break;
}
if (!allEmpty && allMissing && m_skipMissingValues) {
continue;
}
final RowKey oldKey = row.getKey();
final RowKey newKey = new RowKey(oldKey.getString() + "_" + counter++);
final DefaultRow newRow = createClone(newKey, row, m_colIndices, m_removeCollectionCol, newCells);
out.push(newRow);
if (keys != null) {
keys.add(newKey);
}
} while (continueLoop);
if (keys != null && !keys.isEmpty()) {
hiliteMapping.put(row.getKey(), keys);
}
}
if (m_enableHilite) {
trans.setMapper(new DefaultHiLiteMapper(hiliteMapping));
}
}
use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.
the class CellReplacerNodeModel method createColumnRearranger.
private ColumnRearranger createColumnRearranger(final DataTableSpec spec, final DataTableSpec dictSpec, final BufferedDataTable dictTable, final ExecutionMonitor dictionaryInitExec) throws InvalidSettingsException {
String targetCol = m_targetColModel.getStringValue();
if (targetCol == null || targetCol.length() == 0) {
throw new InvalidSettingsException("No target column selected");
}
final int targetColIndex = spec.findColumnIndex(targetCol);
if (targetColIndex < 0) {
throw new InvalidSettingsException("No such column \"" + targetCol + "\"");
}
final DataColumnSpec targetColSpec = spec.getColumnSpec(targetColIndex);
final int dictInputColIndex = dictSpec.findColumnIndex(m_dictInputColModel.getStringValue());
final boolean dictInputIsCollection;
if (m_dictInputColModel.useRowID()) {
dictInputIsCollection = false;
} else if (dictInputColIndex < 0) {
throw new InvalidSettingsException("No such column \"" + m_dictInputColModel.getStringValue() + "\"");
} else {
DataColumnSpec inS = dictSpec.getColumnSpec(dictInputColIndex);
dictInputIsCollection = inS.getType().isCollectionType();
}
final int dictOutputColIndex = dictSpec.findColumnIndex(m_dictOutputColModel.getStringValue());
final DataType dictOutputColType;
if (m_dictOutputColModel.useRowID()) {
dictOutputColType = StringCell.TYPE;
} else {
if (dictOutputColIndex < 0) {
throw new InvalidSettingsException("No such column \"" + m_dictOutputColModel.getStringValue() + "\"");
}
dictOutputColType = dictSpec.getColumnSpec(dictOutputColIndex).getType();
}
final NoMatchPolicy noMatchPolicy = getNoMatchPolicy();
DataType outputType;
switch(noMatchPolicy) {
case Input:
outputType = DataType.getCommonSuperType(dictOutputColType, targetColSpec.getType());
break;
default:
outputType = dictOutputColType;
}
String newColName;
if (m_appendColumnModel.getBooleanValue()) {
String newName = m_appendColumnNameModel.getStringValue();
if (newName == null || newName.length() == 0) {
throw new InvalidSettingsException("No new column name given");
}
newColName = DataTableSpec.getUniqueColumnName(spec, newName);
} else {
newColName = targetColSpec.getName();
}
DataColumnSpecCreator replaceSpecCreator = new DataColumnSpecCreator(newColName, outputType);
CellFactory c = new SingleCellFactory(replaceSpecCreator.createSpec()) {
private Map<DataCell, DataCell> m_dictionaryMap;
@Override
public DataCell getCell(final DataRow row) {
try {
ensureInitDictionaryMap();
} catch (CanceledExecutionException e) {
// cancellation done by the framework
return DataType.getMissingCell();
}
DataCell cell = row.getCell(targetColIndex);
DataCell output = m_dictionaryMap.get(cell);
if (output == null) {
switch(noMatchPolicy) {
case Input:
return cell;
default:
return DataType.getMissingCell();
}
}
return output;
}
private void ensureInitDictionaryMap() throws CanceledExecutionException {
if (m_dictionaryMap == null) {
m_dictionaryMap = new HashMap<DataCell, DataCell>();
int i = 0;
double rowCount = dictTable.size();
for (DataRow r : dictTable) {
dictionaryInitExec.setProgress((i++) / rowCount, "Reading dictionary into memory, row " + i);
dictionaryInitExec.checkCanceled();
DataCell output = dictOutputColIndex < 0 ? new StringCell(r.getKey().getString()) : r.getCell(dictOutputColIndex);
DataCell input = dictInputColIndex < 0 ? new StringCell(r.getKey().getString()) : r.getCell(dictInputColIndex);
if (input.isMissing()) {
addSearchPair(input, output);
} else if (dictInputIsCollection) {
CollectionDataValue v = (CollectionDataValue) input;
for (DataCell element : v) {
addSearchPair(element, output);
}
} else {
addSearchPair(input, output);
}
}
}
}
private void addSearchPair(final DataCell input, final DataCell output) {
if (m_dictionaryMap.put(input, output) != null) {
setWarningMessage("Duplicate search key \"" + input + "\"");
}
}
};
ColumnRearranger result = new ColumnRearranger(spec);
if (m_appendColumnModel.getBooleanValue()) {
result.append(c);
} else {
result.replace(c, targetColIndex);
}
return result;
}
use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.
the class MissingValueRowFilter method matches.
/**
* {@inheritDoc}
*/
@Override
public boolean matches(final DataRow row, final long rowIndex) throws EndOfTableException, IncludeFromNowOn {
// loading filter's settings
assert getColIdx() >= 0;
DataCell theCell = row.getCell(getColIdx());
boolean match = matches(theCell);
if (!match && getDeepFiltering() && (theCell instanceof CollectionDataValue)) {
match = performDeepFiltering((CollectionDataValue) theCell);
}
return ((getInclude() && match) || (!getInclude() && !match));
}
use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.
the class RangeRowFilter method matches.
/**
* {@inheritDoc}
*/
@Override
public boolean matches(final DataRow row, final long rowIndex) throws EndOfTableException, IncludeFromNowOn {
// loading filter's settings
assert getColIdx() >= 0;
assert m_comparator != null;
DataCell theCell = row.getCell(getColIdx());
boolean match = false;
if (theCell.isMissing()) {
match = false;
} else {
if (getDeepFiltering() && (theCell instanceof CollectionDataValue)) {
match = performDeepFiltering((CollectionDataValue) theCell);
} else {
match = matches(theCell);
}
}
return ((getInclude() && match) || (!getInclude() && !match));
}
Aggregations