Search in sources :

Example 6 with CollectionDataValue

use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.

the class SubsetMatcherNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final BufferedDataTable subsetTable = inData[0];
    final DataTableSpec subsetTableSpec = subsetTable.getSpec();
    final int subsetColIdx = subsetTableSpec.findColumnIndex(m_subsetCol.getStringValue());
    // the comparator that should be used to sort the subset AND the
    // set list
    final Comparator<DataCell> comparator = subsetTableSpec.getColumnSpec(subsetColIdx).getType().getComparator();
    final BufferedDataTable setTable = inData[1];
    final DataTableSpec setTableSpec = setTable.getSpec();
    final int setIDColIdx;
    final DataColumnSpec setIDSpec;
    if (m_setIDCol.useRowID()) {
        setIDColIdx = -1;
        setIDSpec = null;
    } else {
        setIDColIdx = setTableSpec.findColumnIndex(m_setIDCol.getStringValue());
        setIDSpec = setTableSpec.getColumnSpec(setIDColIdx);
    }
    final int transColIdx = setTableSpec.findColumnIndex(m_setCol.getStringValue());
    final boolean appendSetCol = m_appendSetListCol.getBooleanValue();
    // create the data container
    final DataTableSpec resultSpec = createTableSpec(setIDSpec, setTableSpec.getColumnSpec(transColIdx), subsetTableSpec.getColumnSpec(subsetColIdx), appendSetCol);
    m_dc = exec.createDataContainer(resultSpec);
    final long subsetRowCount = subsetTable.size();
    if (subsetRowCount == 0) {
        setWarningMessage("Empty subset table found");
        m_dc.close();
        return new BufferedDataTable[] { m_dc.getTable() };
    }
    final long setRowCount = setTable.size();
    if (setRowCount == 0) {
        setWarningMessage("Empty set table found");
        m_dc.close();
        return new BufferedDataTable[] { m_dc.getTable() };
    }
    final double totalRowCount = subsetRowCount + setRowCount * SET_PROCESSING_FACTOR;
    final ExecutionMonitor subsetExec = exec.createSubProgress(subsetRowCount / totalRowCount);
    // create the rule model
    exec.setMessage("Generating subset base...");
    final SubsetMatcher[] sortedMatcher = createSortedMatcher(subsetExec, subsetTable, subsetColIdx, comparator);
    subsetExec.setProgress(1.0);
    if (sortedMatcher.length < 1) {
        setWarningMessage("No item sets found");
        m_dc.close();
        return new BufferedDataTable[] { m_dc.getTable() };
    }
    final ExecutionMonitor setExec = exec.createSubProgress((setRowCount * SET_PROCESSING_FACTOR) / totalRowCount);
    // create the matching processes
    exec.setMessage("Processing sets... ");
    // initialize the thread pool for parallelization of the set
    // analysis
    final ThreadPool pool = KNIMEConstants.GLOBAL_THREAD_POOL.createSubPool(1);
    for (final DataRow row : setTable) {
        exec.checkCanceled();
        DataCell setIDCell;
        if (setIDColIdx < 0) {
            final RowKey key = row.getKey();
            setIDCell = new StringCell(key.getString());
        } else {
            setIDCell = row.getCell(setIDColIdx);
        }
        final DataCell setCell = row.getCell(transColIdx);
        if (!(setCell instanceof CollectionDataValue)) {
            setExec.setProgress(m_setCounter.incrementAndGet() / (double) setRowCount);
            m_skipCounter.incrementAndGet();
            continue;
        }
        final CollectionDataValue setList = (CollectionDataValue) setCell;
        if (setList.size() < 1) {
            // skip empty sets
            setExec.setProgress(m_setCounter.incrementAndGet() / (double) setRowCount);
            m_skipCounter.incrementAndGet();
            continue;
        }
        // submit for each set a job in the thread pool
        pool.enqueue(createRunnable(setExec, setRowCount, setIDCell, setList, appendSetCol, comparator, sortedMatcher, m_maxMismatches.getIntValue()));
    }
    // wait until all jobs are finished before closing the container
    // and returning the method
    pool.waitForTermination();
    exec.setMessage("Creating data table...");
    m_dc.close();
    if (m_skipCounter.intValue() > 0) {
        setWarningMessage("No matching subsets found for " + m_skipCounter + " out of " + setRowCount + " sets");
    }
    exec.setProgress(1.0);
    return new BufferedDataTable[] { m_dc.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) RowKey(org.knime.core.data.RowKey) ThreadPool(org.knime.core.util.ThreadPool) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) StringCell(org.knime.core.data.def.StringCell) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue)

Example 7 with CollectionDataValue

use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.

the class UngroupOperation method compute.

/**
 * Performs the ungroup operation on the given row input and pushes the result to the row output.
 *
 * @param in the row input, will NOT be closed when finished
 * @param out the row input, will NOT be closed when finished
 * @param exec the execution context to check cancellation and (optional) progress logging
 * @param rowCount row count to track the progress or <code>-1</code> without progress tracking
 * @throws Exception the thrown exception
 * @since 3.2
 */
public void compute(final RowInput in, final RowOutput out, final ExecutionContext exec, final long rowCount) throws Exception {
    final Map<RowKey, Set<RowKey>> hiliteMapping = new HashMap<RowKey, Set<RowKey>>();
    @SuppressWarnings("unchecked") Iterator<DataCell>[] iterators = new Iterator[m_colIndices.length];
    final DataCell[] missingCells = new DataCell[m_colIndices.length];
    Arrays.fill(missingCells, DataType.getMissingCell());
    long rowCounter = 0;
    DataRow row = null;
    while ((row = in.poll()) != null) {
        rowCounter++;
        exec.checkCanceled();
        if (rowCount > 0) {
            exec.setProgress(rowCounter / (double) rowCount, "Processing row " + rowCounter + " of " + rowCount);
        }
        boolean allMissing = true;
        for (int i = 0, length = m_colIndices.length; i < length; i++) {
            final DataCell cell = row.getCell(m_colIndices[i]);
            final CollectionDataValue listCell;
            final Iterator<DataCell> iterator;
            if (cell instanceof CollectionDataValue) {
                listCell = (CollectionDataValue) cell;
                iterator = listCell.iterator();
                allMissing = false;
            } else {
                iterator = null;
            }
            iterators[i] = iterator;
        }
        if (allMissing) {
            // with missing cells as well if the skip missing value option is disabled
            if (!m_skipMissingValues) {
                final DefaultRow newRow = createClone(row.getKey(), row, m_colIndices, m_removeCollectionCol, missingCells);
                if (m_enableHilite) {
                    // create the hilite entry
                    final Set<RowKey> keys = new HashSet<RowKey>(1);
                    keys.add(row.getKey());
                    hiliteMapping.put(row.getKey(), keys);
                }
                out.push(newRow);
            }
            continue;
        }
        long counter = 1;
        final Set<RowKey> keys;
        if (m_enableHilite) {
            keys = new HashSet<RowKey>();
        } else {
            keys = null;
        }
        boolean continueLoop = false;
        boolean allEmpty = true;
        do {
            // reset the loop flag
            allMissing = true;
            continueLoop = false;
            final DataCell[] newCells = new DataCell[iterators.length];
            for (int i = 0, length = iterators.length; i < length; i++) {
                Iterator<DataCell> iterator = iterators[i];
                DataCell newCell;
                if (iterator != null && iterator.hasNext()) {
                    allEmpty = false;
                    continueLoop = true;
                    newCell = iterator.next();
                } else {
                    if (iterator == null) {
                        allEmpty = false;
                    }
                    newCell = DataType.getMissingCell();
                }
                if (!newCell.isMissing()) {
                    allMissing = false;
                }
                newCells[i] = newCell;
            }
            if (!allEmpty && !continueLoop) {
                break;
            }
            if (!allEmpty && allMissing && m_skipMissingValues) {
                continue;
            }
            final RowKey oldKey = row.getKey();
            final RowKey newKey = new RowKey(oldKey.getString() + "_" + counter++);
            final DefaultRow newRow = createClone(newKey, row, m_colIndices, m_removeCollectionCol, newCells);
            out.push(newRow);
            if (keys != null) {
                keys.add(newKey);
            }
        } while (continueLoop);
        if (keys != null && !keys.isEmpty()) {
            hiliteMapping.put(row.getKey(), keys);
        }
    }
    if (m_enableHilite) {
        m_trans.setMapper(new DefaultHiLiteMapper(hiliteMapping));
    }
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DataRow(org.knime.core.data.DataRow) Iterator(java.util.Iterator) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) DefaultHiLiteMapper(org.knime.core.node.property.hilite.DefaultHiLiteMapper) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue) HashSet(java.util.HashSet)

Example 8 with CollectionDataValue

use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.

the class SubsetMatcherNodeModel method createSortedMatcher.

private SubsetMatcher[] createSortedMatcher(final ExecutionMonitor exec, final BufferedDataTable table, final int colIdx, final Comparator<DataCell> comparator) throws CanceledExecutionException {
    final Map<DataCell, SubsetMatcher> map = new HashMap<>();
    final long rowCount = table.size();
    if (rowCount < 1) {
        return new SubsetMatcher[0];
    }
    long counter = 1;
    for (final DataRow row : table) {
        exec.checkCanceled();
        exec.setProgress(counter / (double) rowCount, "Processing subset " + counter + " of " + rowCount);
        final DataCell cell = row.getCell(colIdx);
        if (!(cell instanceof CollectionDataValue)) {
            // skip missing cells and none collection cells
            continue;
        }
        final CollectionDataValue collectionCell = (CollectionDataValue) cell;
        if (collectionCell.size() <= 0) {
            // skip empty collections
            continue;
        }
        final DataCell[] itemSet = collectionCell2SortedArray(collectionCell, comparator);
        final DataCell rootItem = itemSet[0];
        SubsetMatcher matcher = map.get(rootItem);
        if (matcher == null) {
            matcher = new SubsetMatcher(rootItem, comparator);
            map.put(rootItem, matcher);
        }
        matcher.appendChildMatcher(itemSet, 1);
        counter++;
    }
    final ArrayList<SubsetMatcher> matchers = new ArrayList<>(map.values());
    Collections.sort(matchers);
    return matchers.toArray(new SubsetMatcher[0]);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue)

Example 9 with CollectionDataValue

use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.

the class StringCompareRowFilter method matches.

/**
 * {@inheritDoc}
 */
@Override
public boolean matches(final DataRow row, final long rowIndex) throws EndOfTableException, IncludeFromNowOn {
    // loading filter's settings
    assert getColIdx() >= 0;
    DataCell theCell = row.getCell(getColIdx());
    boolean match = false;
    if (theCell.isMissing()) {
        match = false;
    } else {
        if (getDeepFiltering() && (theCell instanceof CollectionDataValue)) {
            match = performDeepFiltering((CollectionDataValue) theCell);
        } else {
            match = matches(theCell);
        }
    }
    return ((getInclude() && match) || (!getInclude() && !match));
}
Also used : DataCell(org.knime.core.data.DataCell) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue)

Example 10 with CollectionDataValue

use of org.knime.core.data.collection.CollectionDataValue in project knime-core by knime.

the class Collection2BitVectorCellFactory method getCell.

/**
 * {@inheritDoc}
 */
@Override
public DataCell getCell(final DataRow row) {
    incrementNrOfRows();
    final DataCell cell = row.getCell(getColumnIndex());
    if (cell.isMissing()) {
        return DataType.getMissingCell();
    }
    if (cell instanceof CollectionDataValue) {
        org.knime.core.data.vector.bitvector.BitVectorCellFactory<? extends DataCell> factory = getVectorType().getCellFactory(m_idxMap.size());
        final CollectionDataValue collCell = (CollectionDataValue) cell;
        for (final DataCell valCell : collCell) {
            if (valCell.isMissing()) {
                continue;
            }
            final Integer bitIdx = m_idxMap.get(valCell.toString());
            if (bitIdx != null) {
                factory.set(bitIdx.intValue());
            } else {
                printError(LOGGER, row, "No bit index found for cell " + valCell.toString());
                return DataType.getMissingCell();
            }
        }
        m_nrOfSetBits += collCell.size();
        m_nrOfNotSetBits += m_idxMap.size() - collCell.size();
        return factory.createDataCell();
    } else {
        printError(LOGGER, row, "Incompatible type found");
        return DataType.getMissingCell();
    }
}
Also used : DataCell(org.knime.core.data.DataCell) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue)

Aggregations

CollectionDataValue (org.knime.core.data.collection.CollectionDataValue)19 DataCell (org.knime.core.data.DataCell)18 DataRow (org.knime.core.data.DataRow)9 HashMap (java.util.HashMap)7 DataColumnSpec (org.knime.core.data.DataColumnSpec)5 DataType (org.knime.core.data.DataType)5 HashSet (java.util.HashSet)4 ArrayList (java.util.ArrayList)3 Map (java.util.Map)3 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)3 DataTableSpec (org.knime.core.data.DataTableSpec)3 RowKey (org.knime.core.data.RowKey)3 DefaultRow (org.knime.core.data.def.DefaultRow)3 Iterator (java.util.Iterator)2 LinkedHashMap (java.util.LinkedHashMap)2 Set (java.util.Set)2 StringCell (org.knime.core.data.def.StringCell)2 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)2 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)2 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)2