Search in sources :

Example 1 with CollectionDataValue

use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.

the class CollectionSplitNodeModel method countNewColumns.

/**
 * Iterate the argument table, determine maximum element count,
 * return freshly created column specs.
 */
private DataColumnSpec[] countNewColumns(final BufferedDataTable table, final ExecutionMonitor exec) throws InvalidSettingsException, CanceledExecutionException {
    DataTableSpec spec = table.getDataTableSpec();
    long i = 0;
    long rowCount = table.size();
    int maxColumns = 0;
    int targetColIndex = getTargetColIndex(spec);
    for (DataRow row : table) {
        DataCell c = row.getCell(targetColIndex);
        if (!c.isMissing()) {
            maxColumns = Math.max(((CollectionDataValue) c).size(), maxColumns);
        }
        exec.setProgress((i++) / (double) rowCount, "Determining maximum element count, row \"" + row.getKey() + "\" (" + i + "/" + rowCount + ")");
        exec.checkCanceled();
    }
    HashSet<String> hashNames = new HashSet<String>();
    for (DataColumnSpec s : spec) {
        hashNames.add(s.getName());
    }
    if (m_settings.isReplaceInputColumn()) {
        hashNames.remove(spec.getColumnSpec(targetColIndex).getName());
    }
    DataType elementType = spec.getColumnSpec(targetColIndex).getType().getCollectionElementType();
    DataColumnSpec[] newColSpec = new DataColumnSpec[maxColumns];
    for (int j = 0; j < newColSpec.length; j++) {
        String baseName = "Split Value " + (j + 1);
        String newName = baseName;
        int uniquifier = 1;
        while (!hashNames.add(newName)) {
            newName = baseName + "(#" + (uniquifier++) + ")";
        }
        newColSpec[j] = new DataColumnSpecCreator(newName, elementType).createSpec();
    }
    return newColSpec;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue) HashSet(java.util.HashSet)

Example 2 with CollectionDataValue

use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.

the class UngroupOperation2 method compute.

/**
 * Performs the ungroup operation on the given row input and pushes the result to the row output.
 *
 * @param in the row input, will NOT be closed when finished
 * @param out the row input, will NOT be closed when finished
 * @param exec the execution context to check cancellation and (optional) progress logging
 * @param rowCount row count to track the progress or <code>-1</code> without progress tracking
 * @param trans the hilite translater, will be modified directly. Must be non-null if hiliting is enabled, can be
 *            <code>null</code> otherwise
 * @throws CanceledExecutionException if the execution has been canceled
 * @throws InterruptedException if the execution has been interrupted
 * @throws IllegalArgumentException if hiliting is enabled and no hilite translater is given
 */
public void compute(final RowInput in, final RowOutput out, final ExecutionContext exec, final long rowCount, final HiLiteTranslator trans) throws CanceledExecutionException, InterruptedException {
    if (m_enableHilite && trans == null) {
        throw new IllegalArgumentException("HiLiteTranslator must not be null when hiliting is enabled!");
    }
    final Map<RowKey, Set<RowKey>> hiliteMapping = new HashMap<RowKey, Set<RowKey>>();
    @SuppressWarnings("unchecked") Iterator<DataCell>[] iterators = new Iterator[m_colIndices.length];
    final DataCell[] missingCells = new DataCell[m_colIndices.length];
    Arrays.fill(missingCells, DataType.getMissingCell());
    long rowCounter = 0;
    DataRow row = null;
    while ((row = in.poll()) != null) {
        rowCounter++;
        exec.checkCanceled();
        if (rowCount > 0) {
            exec.setProgress(rowCounter / (double) rowCount, "Processing row " + rowCounter + " of " + rowCount);
        }
        boolean allMissing = true;
        for (int i = 0, length = m_colIndices.length; i < length; i++) {
            final DataCell cell = row.getCell(m_colIndices[i]);
            final CollectionDataValue listCell;
            final Iterator<DataCell> iterator;
            if (cell instanceof CollectionDataValue) {
                listCell = (CollectionDataValue) cell;
                iterator = listCell.iterator();
                allMissing = false;
            } else {
                iterator = null;
            }
            iterators[i] = iterator;
        }
        if (allMissing) {
            // with missing cells as well if the skip missing value option is disabled
            if (!m_skipMissingValues) {
                final DefaultRow newRow = createClone(row.getKey(), row, m_colIndices, m_removeCollectionCol, missingCells);
                if (m_enableHilite) {
                    // create the hilite entry
                    final Set<RowKey> keys = new HashSet<RowKey>(1);
                    keys.add(row.getKey());
                    hiliteMapping.put(row.getKey(), keys);
                }
                out.push(newRow);
            }
            continue;
        }
        long counter = 1;
        final Set<RowKey> keys;
        if (m_enableHilite) {
            keys = new HashSet<RowKey>();
        } else {
            keys = null;
        }
        boolean continueLoop = false;
        boolean allEmpty = true;
        do {
            // reset the loop flag
            allMissing = true;
            continueLoop = false;
            final DataCell[] newCells = new DataCell[iterators.length];
            for (int i = 0, length = iterators.length; i < length; i++) {
                Iterator<DataCell> iterator = iterators[i];
                DataCell newCell;
                if (iterator != null && iterator.hasNext()) {
                    allEmpty = false;
                    continueLoop = true;
                    newCell = iterator.next();
                } else {
                    if (iterator == null) {
                        allEmpty = false;
                    }
                    newCell = DataType.getMissingCell();
                }
                if (!newCell.isMissing()) {
                    allMissing = false;
                }
                newCells[i] = newCell;
            }
            if (!allEmpty && !continueLoop) {
                break;
            }
            if (!allEmpty && allMissing && m_skipMissingValues) {
                continue;
            }
            final RowKey oldKey = row.getKey();
            final RowKey newKey = new RowKey(oldKey.getString() + "_" + counter++);
            final DefaultRow newRow = createClone(newKey, row, m_colIndices, m_removeCollectionCol, newCells);
            out.push(newRow);
            if (keys != null) {
                keys.add(newKey);
            }
        } while (continueLoop);
        if (keys != null && !keys.isEmpty()) {
            hiliteMapping.put(row.getKey(), keys);
        }
    }
    if (m_enableHilite) {
        trans.setMapper(new DefaultHiLiteMapper(hiliteMapping));
    }
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DataRow(org.knime.core.data.DataRow) Iterator(java.util.Iterator) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) DefaultHiLiteMapper(org.knime.core.node.property.hilite.DefaultHiLiteMapper) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue) HashSet(java.util.HashSet)

Example 3 with CollectionDataValue

use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.

the class CellReplacerNodeModel method createColumnRearranger.

private ColumnRearranger createColumnRearranger(final DataTableSpec spec, final DataTableSpec dictSpec, final BufferedDataTable dictTable, final ExecutionMonitor dictionaryInitExec) throws InvalidSettingsException {
    String targetCol = m_targetColModel.getStringValue();
    if (targetCol == null || targetCol.length() == 0) {
        throw new InvalidSettingsException("No target column selected");
    }
    final int targetColIndex = spec.findColumnIndex(targetCol);
    if (targetColIndex < 0) {
        throw new InvalidSettingsException("No such column \"" + targetCol + "\"");
    }
    final DataColumnSpec targetColSpec = spec.getColumnSpec(targetColIndex);
    final int dictInputColIndex = dictSpec.findColumnIndex(m_dictInputColModel.getStringValue());
    final boolean dictInputIsCollection;
    if (m_dictInputColModel.useRowID()) {
        dictInputIsCollection = false;
    } else if (dictInputColIndex < 0) {
        throw new InvalidSettingsException("No such column \"" + m_dictInputColModel.getStringValue() + "\"");
    } else {
        DataColumnSpec inS = dictSpec.getColumnSpec(dictInputColIndex);
        dictInputIsCollection = inS.getType().isCollectionType();
    }
    final int dictOutputColIndex = dictSpec.findColumnIndex(m_dictOutputColModel.getStringValue());
    final DataType dictOutputColType;
    if (m_dictOutputColModel.useRowID()) {
        dictOutputColType = StringCell.TYPE;
    } else {
        if (dictOutputColIndex < 0) {
            throw new InvalidSettingsException("No such column \"" + m_dictOutputColModel.getStringValue() + "\"");
        }
        dictOutputColType = dictSpec.getColumnSpec(dictOutputColIndex).getType();
    }
    final NoMatchPolicy noMatchPolicy = getNoMatchPolicy();
    DataType outputType;
    switch(noMatchPolicy) {
        case Input:
            outputType = DataType.getCommonSuperType(dictOutputColType, targetColSpec.getType());
            break;
        default:
            outputType = dictOutputColType;
    }
    String newColName;
    if (m_appendColumnModel.getBooleanValue()) {
        String newName = m_appendColumnNameModel.getStringValue();
        if (newName == null || newName.length() == 0) {
            throw new InvalidSettingsException("No new column name given");
        }
        newColName = DataTableSpec.getUniqueColumnName(spec, newName);
    } else {
        newColName = targetColSpec.getName();
    }
    DataColumnSpecCreator replaceSpecCreator = new DataColumnSpecCreator(newColName, outputType);
    CellFactory c = new SingleCellFactory(replaceSpecCreator.createSpec()) {

        private Map<DataCell, DataCell> m_dictionaryMap;

        @Override
        public DataCell getCell(final DataRow row) {
            try {
                ensureInitDictionaryMap();
            } catch (CanceledExecutionException e) {
                // cancellation done by the framework
                return DataType.getMissingCell();
            }
            DataCell cell = row.getCell(targetColIndex);
            DataCell output = m_dictionaryMap.get(cell);
            if (output == null) {
                switch(noMatchPolicy) {
                    case Input:
                        return cell;
                    default:
                        return DataType.getMissingCell();
                }
            }
            return output;
        }

        private void ensureInitDictionaryMap() throws CanceledExecutionException {
            if (m_dictionaryMap == null) {
                m_dictionaryMap = new HashMap<DataCell, DataCell>();
                int i = 0;
                double rowCount = dictTable.size();
                for (DataRow r : dictTable) {
                    dictionaryInitExec.setProgress((i++) / rowCount, "Reading dictionary into memory, row " + i);
                    dictionaryInitExec.checkCanceled();
                    DataCell output = dictOutputColIndex < 0 ? new StringCell(r.getKey().getString()) : r.getCell(dictOutputColIndex);
                    DataCell input = dictInputColIndex < 0 ? new StringCell(r.getKey().getString()) : r.getCell(dictInputColIndex);
                    if (input.isMissing()) {
                        addSearchPair(input, output);
                    } else if (dictInputIsCollection) {
                        CollectionDataValue v = (CollectionDataValue) input;
                        for (DataCell element : v) {
                            addSearchPair(element, output);
                        }
                    } else {
                        addSearchPair(input, output);
                    }
                }
            }
        }

        private void addSearchPair(final DataCell input, final DataCell output) {
            if (m_dictionaryMap.put(input, output) != null) {
                setWarningMessage("Duplicate search key \"" + input + "\"");
            }
        }
    };
    ColumnRearranger result = new ColumnRearranger(spec);
    if (m_appendColumnModel.getBooleanValue()) {
        result.append(c);
    } else {
        result.replace(c, targetColIndex);
    }
    return result;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) StringCell(org.knime.core.data.def.StringCell) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) CellFactory(org.knime.core.data.container.CellFactory) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) HashMap(java.util.HashMap) Map(java.util.Map) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue)

Example 4 with CollectionDataValue

use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.

the class MissingValueRowFilter method matches.

/**
 * {@inheritDoc}
 */
@Override
public boolean matches(final DataRow row, final long rowIndex) throws EndOfTableException, IncludeFromNowOn {
    // loading filter's settings
    assert getColIdx() >= 0;
    DataCell theCell = row.getCell(getColIdx());
    boolean match = matches(theCell);
    if (!match && getDeepFiltering() && (theCell instanceof CollectionDataValue)) {
        match = performDeepFiltering((CollectionDataValue) theCell);
    }
    return ((getInclude() && match) || (!getInclude() && !match));
}
Also used : DataCell(org.knime.core.data.DataCell) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue)

Example 5 with CollectionDataValue

use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.

the class RangeRowFilter method matches.

/**
 * {@inheritDoc}
 */
@Override
public boolean matches(final DataRow row, final long rowIndex) throws EndOfTableException, IncludeFromNowOn {
    // loading filter's settings
    assert getColIdx() >= 0;
    assert m_comparator != null;
    DataCell theCell = row.getCell(getColIdx());
    boolean match = false;
    if (theCell.isMissing()) {
        match = false;
    } else {
        if (getDeepFiltering() && (theCell instanceof CollectionDataValue)) {
            match = performDeepFiltering((CollectionDataValue) theCell);
        } else {
            match = matches(theCell);
        }
    }
    return ((getInclude() && match) || (!getInclude() && !match));
}
Also used : DataCell(org.knime.core.data.DataCell) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue)

Aggregations

CollectionDataValue (org.knime.core.data.collection.CollectionDataValue)19 DataCell (org.knime.core.data.DataCell)18 DataRow (org.knime.core.data.DataRow)9 HashMap (java.util.HashMap)7 DataColumnSpec (org.knime.core.data.DataColumnSpec)5 DataType (org.knime.core.data.DataType)5 HashSet (java.util.HashSet)4 ArrayList (java.util.ArrayList)3 Map (java.util.Map)3 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)3 DataTableSpec (org.knime.core.data.DataTableSpec)3 RowKey (org.knime.core.data.RowKey)3 DefaultRow (org.knime.core.data.def.DefaultRow)3 Iterator (java.util.Iterator)2 LinkedHashMap (java.util.LinkedHashMap)2 Set (java.util.Set)2 StringCell (org.knime.core.data.def.StringCell)2 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)2 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)2 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)2