use of org.knime.core.data.container.ColumnRearranger.SpecAndFactoryObject in project knime-core by knime.
the class RearrangeColumnsTable method calcNewColsASynchronously.
/**
* Processes input concurrently using a {@link ConcurrentNewColCalculator}.
*/
private static void calcNewColsASynchronously(final BufferedDataTable table, final ExecutionMonitor subProgress, final NewColumnsProducerMapping newColsProducerMapping, final DataContainer container) throws CanceledExecutionException {
long finalRowCount = table.size();
CellFactory facForProgress = null;
int workers = Integer.MAX_VALUE;
int queueSize = Integer.MAX_VALUE;
Collection<SpecAndFactoryObject> newColsFactories = newColsProducerMapping.getAllNewColumnsList();
for (SpecAndFactoryObject specAndFac : newColsFactories) {
if (specAndFac.getFactory() instanceof AbstractCellFactory) {
AbstractCellFactory acf = (AbstractCellFactory) specAndFac.getFactory();
workers = Math.min(workers, acf.getMaxParallelWorkers());
queueSize = Math.min(queueSize, acf.getMaxQueueSize());
} else {
throw new IllegalStateException("Coding problem: This method" + " should not have been called as the cell factories do not allow parallel processing");
}
if ((facForProgress == null) || !specAndFac.isNewColumn()) {
facForProgress = specAndFac.getFactory();
}
}
assert facForProgress != null;
assert workers > 0 : "Nr workers <= 0: " + workers;
assert queueSize > 0 : "queue size <= 0: " + queueSize;
ConcurrentNewColCalculator calculator = new ConcurrentNewColCalculator(queueSize, workers, container, subProgress, finalRowCount, newColsProducerMapping, facForProgress);
try {
calculator.run(table);
} catch (InterruptedException e) {
CanceledExecutionException cee = new CanceledExecutionException(e.getMessage());
cee.initCause(e);
throw cee;
} catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause == null) {
cause = e;
}
if (cause instanceof RuntimeException) {
throw (RuntimeException) cause;
}
throw new RuntimeException(cause);
}
}
use of org.knime.core.data.container.ColumnRearranger.SpecAndFactoryObject in project knime-core by knime.
the class RearrangeColumnsTable method getUniqueProducerFactories.
/**
* Counts for the argument collection the number of unique cell factories.
*
* @param facs To count in (length = number of newly created columns)
* @return The number of unique factories (in most cases just 1)
*/
static Collection<CellFactory> getUniqueProducerFactories(final Collection<SpecAndFactoryObject> facs) {
IdentityHashMap<CellFactory, Object> counter = new IdentityHashMap<CellFactory, Object>();
for (SpecAndFactoryObject s : facs) {
CellFactory factory = s.getFactory();
counter.put(factory, null);
}
return counter.keySet();
}
use of org.knime.core.data.container.ColumnRearranger.SpecAndFactoryObject in project knime-core by knime.
the class RearrangeColumnsTable method applyDataTypeConverters.
/**
* Used when {@link ColumnRearranger#ensureColumnIsConverted(DataCellTypeConverter, int)} is called. It
* preproccesses the row and replaces the column to be converted by the the result of the given converter.
*
* @param row The original input row.
* @param producerMap The object having the converter list (or not)
* @param newCells
* @return The input row if no converter applied or a modified copy of the input row.
*/
private static DataRow applyDataTypeConverters(final DataRow row, final NewColumnsProducerMapping producerMap, final DataCell[] newCells) {
List<Pair<SpecAndFactoryObject, Integer>> converterToIndexMap = producerMap.getConverterToIndexMap();
if (!converterToIndexMap.isEmpty()) {
DataCell[] inputRowCells = new DataCell[row.getNumCells()];
for (int i = 0; i < inputRowCells.length; i++) {
inputRowCells[i] = row instanceof BlobSupportDataRow ? ((BlobSupportDataRow) row).getRawCell(i) : row.getCell(i);
}
for (Pair<SpecAndFactoryObject, Integer> entry : converterToIndexMap) {
SpecAndFactoryObject specAndObject = entry.getFirst();
DataCellTypeConverter converter = specAndObject.getConverter();
int converterIndex = specAndObject.getConverterIndex();
Integer index = entry.getSecond();
DataCell convertedCell = converter.callConvert(row.getCell(converterIndex));
newCells[index] = convertedCell;
inputRowCells[converterIndex] = convertedCell;
}
return new BlobSupportDataRow(row.getKey(), inputRowCells);
}
return row;
}
use of org.knime.core.data.container.ColumnRearranger.SpecAndFactoryObject in project knime-core by knime.
the class RearrangeColumnsTable method create.
/**
* This factory method is intended to be used immediately before the {@link BufferedDataTable} is created.
*
* @param rearranger The meta information how to assemble everything.
* @param table The reference table.
* @param subProgress The progress monitor for progress/cancel.
* @param context Used for data container creation.
* @return The newly created table.
* @throws CanceledExecutionException If canceled.
* @throws IllegalArgumentException If the spec is not equal to the spec of the rearranger.
*/
public static RearrangeColumnsTable create(final ColumnRearranger rearranger, final BufferedDataTable table, final ExecutionMonitor subProgress, final ExecutionContext context) throws CanceledExecutionException {
DataTableSpec originalSpec = rearranger.getOriginalSpec();
Vector<SpecAndFactoryObject> includes = rearranger.getIncludes();
// names and types of the specs must match
if (!table.getDataTableSpec().equalStructure(originalSpec)) {
throw new IllegalArgumentException("The argument table's spec does not match the original " + "spec passed in the constructor.");
}
int size = includes.size();
ArrayList<DataColumnSpec> newColSpecsList = new ArrayList<DataColumnSpec>();
// the reduced set of SpecAndFactoryObject that models newly
// appended/inserted columns; this vector is in most cases
// considerably smaller than the vector includes
NewColumnsProducerMapping newColsProducerMapping = createNewColumnsProducerMapping(includes);
List<SpecAndFactoryObject> newColumnFactoryList = newColsProducerMapping.getAllNewColumnsList();
// with v2.5 we added the ability to process the input concurrently
// this field has the minimum worker count for all used factories
// (or negative for sequential processing)
int workerCount = Integer.MAX_VALUE;
for (SpecAndFactoryObject s : newColumnFactoryList) {
CellFactory factory = s.getFactory();
if (factory instanceof AbstractCellFactory) {
AbstractCellFactory acf = (AbstractCellFactory) factory;
workerCount = Math.min(workerCount, acf.getMaxParallelWorkers());
} else {
// unknown factory - process sequentially
workerCount = -1;
}
newColSpecsList.add(s.getColSpec());
}
initProcessing(newColsProducerMapping, context);
final int newColCount = newColSpecsList.size();
DataColumnSpec[] newColSpecs = newColSpecsList.toArray(new DataColumnSpec[newColSpecsList.size()]);
ContainerTable appendTable;
DataTableSpec appendTableSpec;
// the entire table (nothing is written anyway))
if (newColCount > 0) {
DataContainer container = context.createDataContainer(new DataTableSpec(newColSpecs));
container.setBufferCreator(new NoKeyBufferCreator());
assert newColumnFactoryList.size() == newColCount;
try {
if (workerCount <= 0) {
calcNewColsSynchronously(table, subProgress, newColsProducerMapping, container);
} else {
calcNewColsASynchronously(table, subProgress, newColsProducerMapping, container);
}
} finally {
container.close();
finishProcessing(newColsProducerMapping);
}
appendTable = container.getBufferedTable();
appendTableSpec = appendTable.getDataTableSpec();
} else {
appendTable = null;
appendTableSpec = new DataTableSpec();
}
boolean[] isFromRefTable = new boolean[size];
int[] includesIndex = new int[size];
// create the new spec. Do not use rearranger.createSpec because
// that might lack the domain information!
DataColumnSpec[] colSpecs = new DataColumnSpec[size];
int newColIndex = 0;
for (int i = 0; i < size; i++) {
SpecAndFactoryObject c = includes.get(i);
if (c.isConvertedColumn()) {
isFromRefTable[i] = false;
includesIndex[i] = newColIndex;
colSpecs[i] = appendTableSpec.getColumnSpec(newColIndex);
newColIndex++;
} else if (c.isNewColumn()) {
isFromRefTable[i] = false;
includesIndex[i] = newColIndex;
colSpecs[i] = appendTableSpec.getColumnSpec(newColIndex);
newColIndex++;
} else {
isFromRefTable[i] = true;
int originalIndex = c.getOriginalIndex();
includesIndex[i] = originalIndex;
colSpecs[i] = originalSpec.getColumnSpec(originalIndex);
}
}
DataTableSpec spec = new DataTableSpec(colSpecs);
return new RearrangeColumnsTable(table, includesIndex, isFromRefTable, spec, appendTable);
}
Aggregations