Search in sources :

Example 1 with SpecAndFactoryObject

use of org.knime.core.data.container.ColumnRearranger.SpecAndFactoryObject in project knime-core by knime.

the class RearrangeColumnsTable method calcNewColsASynchronously.

/**
 * Processes input concurrently using a {@link ConcurrentNewColCalculator}.
 */
private static void calcNewColsASynchronously(final BufferedDataTable table, final ExecutionMonitor subProgress, final NewColumnsProducerMapping newColsProducerMapping, final DataContainer container) throws CanceledExecutionException {
    long finalRowCount = table.size();
    CellFactory facForProgress = null;
    int workers = Integer.MAX_VALUE;
    int queueSize = Integer.MAX_VALUE;
    Collection<SpecAndFactoryObject> newColsFactories = newColsProducerMapping.getAllNewColumnsList();
    for (SpecAndFactoryObject specAndFac : newColsFactories) {
        if (specAndFac.getFactory() instanceof AbstractCellFactory) {
            AbstractCellFactory acf = (AbstractCellFactory) specAndFac.getFactory();
            workers = Math.min(workers, acf.getMaxParallelWorkers());
            queueSize = Math.min(queueSize, acf.getMaxQueueSize());
        } else {
            throw new IllegalStateException("Coding problem: This method" + " should not have been called as the cell factories do not allow parallel processing");
        }
        if ((facForProgress == null) || !specAndFac.isNewColumn()) {
            facForProgress = specAndFac.getFactory();
        }
    }
    assert facForProgress != null;
    assert workers > 0 : "Nr workers <= 0: " + workers;
    assert queueSize > 0 : "queue size <= 0: " + queueSize;
    ConcurrentNewColCalculator calculator = new ConcurrentNewColCalculator(queueSize, workers, container, subProgress, finalRowCount, newColsProducerMapping, facForProgress);
    try {
        calculator.run(table);
    } catch (InterruptedException e) {
        CanceledExecutionException cee = new CanceledExecutionException(e.getMessage());
        cee.initCause(e);
        throw cee;
    } catch (ExecutionException e) {
        Throwable cause = e.getCause();
        if (cause == null) {
            cause = e;
        }
        if (cause instanceof RuntimeException) {
            throw (RuntimeException) cause;
        }
        throw new RuntimeException(cause);
    }
}
Also used : CanceledExecutionException(org.knime.core.node.CanceledExecutionException) SpecAndFactoryObject(org.knime.core.data.container.ColumnRearranger.SpecAndFactoryObject) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) ExecutionException(java.util.concurrent.ExecutionException)

Example 2 with SpecAndFactoryObject

use of org.knime.core.data.container.ColumnRearranger.SpecAndFactoryObject in project knime-core by knime.

the class RearrangeColumnsTable method getUniqueProducerFactories.

/**
 * Counts for the argument collection the number of unique cell factories.
 *
 * @param facs To count in (length = number of newly created columns)
 * @return The number of unique factories (in most cases just 1)
 */
static Collection<CellFactory> getUniqueProducerFactories(final Collection<SpecAndFactoryObject> facs) {
    IdentityHashMap<CellFactory, Object> counter = new IdentityHashMap<CellFactory, Object>();
    for (SpecAndFactoryObject s : facs) {
        CellFactory factory = s.getFactory();
        counter.put(factory, null);
    }
    return counter.keySet();
}
Also used : SpecAndFactoryObject(org.knime.core.data.container.ColumnRearranger.SpecAndFactoryObject) IdentityHashMap(java.util.IdentityHashMap) SpecAndFactoryObject(org.knime.core.data.container.ColumnRearranger.SpecAndFactoryObject)

Example 3 with SpecAndFactoryObject

use of org.knime.core.data.container.ColumnRearranger.SpecAndFactoryObject in project knime-core by knime.

the class RearrangeColumnsTable method applyDataTypeConverters.

/**
 * Used when {@link ColumnRearranger#ensureColumnIsConverted(DataCellTypeConverter, int)} is called. It
 * preproccesses the row and replaces the column to be converted by the the result of the given converter.
 *
 * @param row The original input row.
 * @param producerMap The object having the converter list (or not)
 * @param newCells
 * @return The input row if no converter applied or a modified copy of the input row.
 */
private static DataRow applyDataTypeConverters(final DataRow row, final NewColumnsProducerMapping producerMap, final DataCell[] newCells) {
    List<Pair<SpecAndFactoryObject, Integer>> converterToIndexMap = producerMap.getConverterToIndexMap();
    if (!converterToIndexMap.isEmpty()) {
        DataCell[] inputRowCells = new DataCell[row.getNumCells()];
        for (int i = 0; i < inputRowCells.length; i++) {
            inputRowCells[i] = row instanceof BlobSupportDataRow ? ((BlobSupportDataRow) row).getRawCell(i) : row.getCell(i);
        }
        for (Pair<SpecAndFactoryObject, Integer> entry : converterToIndexMap) {
            SpecAndFactoryObject specAndObject = entry.getFirst();
            DataCellTypeConverter converter = specAndObject.getConverter();
            int converterIndex = specAndObject.getConverterIndex();
            Integer index = entry.getSecond();
            DataCell convertedCell = converter.callConvert(row.getCell(converterIndex));
            newCells[index] = convertedCell;
            inputRowCells[converterIndex] = convertedCell;
        }
        return new BlobSupportDataRow(row.getKey(), inputRowCells);
    }
    return row;
}
Also used : DataCellTypeConverter(org.knime.core.data.DataCellTypeConverter) SpecAndFactoryObject(org.knime.core.data.container.ColumnRearranger.SpecAndFactoryObject) DataCell(org.knime.core.data.DataCell) Pair(org.knime.core.util.Pair)

Example 4 with SpecAndFactoryObject

use of org.knime.core.data.container.ColumnRearranger.SpecAndFactoryObject in project knime-core by knime.

the class RearrangeColumnsTable method create.

/**
 * This factory method is intended to be used immediately before the {@link BufferedDataTable} is created.
 *
 * @param rearranger The meta information how to assemble everything.
 * @param table The reference table.
 * @param subProgress The progress monitor for progress/cancel.
 * @param context Used for data container creation.
 * @return The newly created table.
 * @throws CanceledExecutionException If canceled.
 * @throws IllegalArgumentException If the spec is not equal to the spec of the rearranger.
 */
public static RearrangeColumnsTable create(final ColumnRearranger rearranger, final BufferedDataTable table, final ExecutionMonitor subProgress, final ExecutionContext context) throws CanceledExecutionException {
    DataTableSpec originalSpec = rearranger.getOriginalSpec();
    Vector<SpecAndFactoryObject> includes = rearranger.getIncludes();
    // names and types of the specs must match
    if (!table.getDataTableSpec().equalStructure(originalSpec)) {
        throw new IllegalArgumentException("The argument table's spec does not match the original " + "spec passed in the constructor.");
    }
    int size = includes.size();
    ArrayList<DataColumnSpec> newColSpecsList = new ArrayList<DataColumnSpec>();
    // the reduced set of SpecAndFactoryObject that models newly
    // appended/inserted columns; this vector is in most cases
    // considerably smaller than the vector includes
    NewColumnsProducerMapping newColsProducerMapping = createNewColumnsProducerMapping(includes);
    List<SpecAndFactoryObject> newColumnFactoryList = newColsProducerMapping.getAllNewColumnsList();
    // with v2.5 we added the ability to process the input concurrently
    // this field has the minimum worker count for all used factories
    // (or negative for sequential processing)
    int workerCount = Integer.MAX_VALUE;
    for (SpecAndFactoryObject s : newColumnFactoryList) {
        CellFactory factory = s.getFactory();
        if (factory instanceof AbstractCellFactory) {
            AbstractCellFactory acf = (AbstractCellFactory) factory;
            workerCount = Math.min(workerCount, acf.getMaxParallelWorkers());
        } else {
            // unknown factory - process sequentially
            workerCount = -1;
        }
        newColSpecsList.add(s.getColSpec());
    }
    initProcessing(newColsProducerMapping, context);
    final int newColCount = newColSpecsList.size();
    DataColumnSpec[] newColSpecs = newColSpecsList.toArray(new DataColumnSpec[newColSpecsList.size()]);
    ContainerTable appendTable;
    DataTableSpec appendTableSpec;
    // the entire table (nothing is written anyway))
    if (newColCount > 0) {
        DataContainer container = context.createDataContainer(new DataTableSpec(newColSpecs));
        container.setBufferCreator(new NoKeyBufferCreator());
        assert newColumnFactoryList.size() == newColCount;
        try {
            if (workerCount <= 0) {
                calcNewColsSynchronously(table, subProgress, newColsProducerMapping, container);
            } else {
                calcNewColsASynchronously(table, subProgress, newColsProducerMapping, container);
            }
        } finally {
            container.close();
            finishProcessing(newColsProducerMapping);
        }
        appendTable = container.getBufferedTable();
        appendTableSpec = appendTable.getDataTableSpec();
    } else {
        appendTable = null;
        appendTableSpec = new DataTableSpec();
    }
    boolean[] isFromRefTable = new boolean[size];
    int[] includesIndex = new int[size];
    // create the new spec. Do not use rearranger.createSpec because
    // that might lack the domain information!
    DataColumnSpec[] colSpecs = new DataColumnSpec[size];
    int newColIndex = 0;
    for (int i = 0; i < size; i++) {
        SpecAndFactoryObject c = includes.get(i);
        if (c.isConvertedColumn()) {
            isFromRefTable[i] = false;
            includesIndex[i] = newColIndex;
            colSpecs[i] = appendTableSpec.getColumnSpec(newColIndex);
            newColIndex++;
        } else if (c.isNewColumn()) {
            isFromRefTable[i] = false;
            includesIndex[i] = newColIndex;
            colSpecs[i] = appendTableSpec.getColumnSpec(newColIndex);
            newColIndex++;
        } else {
            isFromRefTable[i] = true;
            int originalIndex = c.getOriginalIndex();
            includesIndex[i] = originalIndex;
            colSpecs[i] = originalSpec.getColumnSpec(originalIndex);
        }
    }
    DataTableSpec spec = new DataTableSpec(colSpecs);
    return new RearrangeColumnsTable(table, includesIndex, isFromRefTable, spec, appendTable);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ArrayList(java.util.ArrayList) DataColumnSpec(org.knime.core.data.DataColumnSpec) SpecAndFactoryObject(org.knime.core.data.container.ColumnRearranger.SpecAndFactoryObject)

Aggregations

SpecAndFactoryObject (org.knime.core.data.container.ColumnRearranger.SpecAndFactoryObject)4 ArrayList (java.util.ArrayList)1 IdentityHashMap (java.util.IdentityHashMap)1 ExecutionException (java.util.concurrent.ExecutionException)1 DataCell (org.knime.core.data.DataCell)1 DataCellTypeConverter (org.knime.core.data.DataCellTypeConverter)1 DataColumnSpec (org.knime.core.data.DataColumnSpec)1 DataTableSpec (org.knime.core.data.DataTableSpec)1 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)1 Pair (org.knime.core.util.Pair)1