use of org.knime.core.util.ThreadPool in project knime-core by knime.
the class SubsetMatcherNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final BufferedDataTable subsetTable = inData[0];
final DataTableSpec subsetTableSpec = subsetTable.getSpec();
final int subsetColIdx = subsetTableSpec.findColumnIndex(m_subsetCol.getStringValue());
// the comparator that should be used to sort the subset AND the
// set list
final Comparator<DataCell> comparator = subsetTableSpec.getColumnSpec(subsetColIdx).getType().getComparator();
final BufferedDataTable setTable = inData[1];
final DataTableSpec setTableSpec = setTable.getSpec();
final int setIDColIdx;
final DataColumnSpec setIDSpec;
if (m_setIDCol.useRowID()) {
setIDColIdx = -1;
setIDSpec = null;
} else {
setIDColIdx = setTableSpec.findColumnIndex(m_setIDCol.getStringValue());
setIDSpec = setTableSpec.getColumnSpec(setIDColIdx);
}
final int transColIdx = setTableSpec.findColumnIndex(m_setCol.getStringValue());
final boolean appendSetCol = m_appendSetListCol.getBooleanValue();
// create the data container
final DataTableSpec resultSpec = createTableSpec(setIDSpec, setTableSpec.getColumnSpec(transColIdx), subsetTableSpec.getColumnSpec(subsetColIdx), appendSetCol);
m_dc = exec.createDataContainer(resultSpec);
final long subsetRowCount = subsetTable.size();
if (subsetRowCount == 0) {
setWarningMessage("Empty subset table found");
m_dc.close();
return new BufferedDataTable[] { m_dc.getTable() };
}
final long setRowCount = setTable.size();
if (setRowCount == 0) {
setWarningMessage("Empty set table found");
m_dc.close();
return new BufferedDataTable[] { m_dc.getTable() };
}
final double totalRowCount = subsetRowCount + setRowCount * SET_PROCESSING_FACTOR;
final ExecutionMonitor subsetExec = exec.createSubProgress(subsetRowCount / totalRowCount);
// create the rule model
exec.setMessage("Generating subset base...");
final SubsetMatcher[] sortedMatcher = createSortedMatcher(subsetExec, subsetTable, subsetColIdx, comparator);
subsetExec.setProgress(1.0);
if (sortedMatcher.length < 1) {
setWarningMessage("No item sets found");
m_dc.close();
return new BufferedDataTable[] { m_dc.getTable() };
}
final ExecutionMonitor setExec = exec.createSubProgress((setRowCount * SET_PROCESSING_FACTOR) / totalRowCount);
// create the matching processes
exec.setMessage("Processing sets... ");
// initialize the thread pool for parallelization of the set
// analysis
final ThreadPool pool = KNIMEConstants.GLOBAL_THREAD_POOL.createSubPool(1);
for (final DataRow row : setTable) {
exec.checkCanceled();
DataCell setIDCell;
if (setIDColIdx < 0) {
final RowKey key = row.getKey();
setIDCell = new StringCell(key.getString());
} else {
setIDCell = row.getCell(setIDColIdx);
}
final DataCell setCell = row.getCell(transColIdx);
if (!(setCell instanceof CollectionDataValue)) {
setExec.setProgress(m_setCounter.incrementAndGet() / (double) setRowCount);
m_skipCounter.incrementAndGet();
continue;
}
final CollectionDataValue setList = (CollectionDataValue) setCell;
if (setList.size() < 1) {
// skip empty sets
setExec.setProgress(m_setCounter.incrementAndGet() / (double) setRowCount);
m_skipCounter.incrementAndGet();
continue;
}
// submit for each set a job in the thread pool
pool.enqueue(createRunnable(setExec, setRowCount, setIDCell, setList, appendSetCol, comparator, sortedMatcher, m_maxMismatches.getIntValue()));
}
// wait until all jobs are finished before closing the container
// and returning the method
pool.waitForTermination();
exec.setMessage("Creating data table...");
m_dc.close();
if (m_skipCounter.intValue() > 0) {
setWarningMessage("No matching subsets found for " + m_skipCounter + " out of " + setRowCount + " sets");
}
exec.setProgress(1.0);
return new BufferedDataTable[] { m_dc.getTable() };
}
use of org.knime.core.util.ThreadPool in project knime-core by knime.
the class SubNodeContainer method fetchInputDataFromParent.
/* -------------------- Virtual node callbacks -------------- */
/**
* Called from virtual input node when executed - in possibly executes nodes in the parent wfm and then
* fetches the data from it.
* @return the subnode data input (incl. mandatory flow var port object).
* @throws ExecutionException any exception thrown while waiting for upstream nodes to finish execution.
*/
public PortObject[] fetchInputDataFromParent() throws ExecutionException {
Callable<PortObject[]> c = new Callable<PortObject[]>() {
@Override
public PortObject[] call() throws Exception {
final WorkflowManager parent = getParent();
// might be not yet or no longer in workflow (e.g. part of construction)
if (parent.containsNodeContainer(getID())) {
PortObject[] results = new PortObject[getNrInPorts()];
parent.executePredecessorsAndWait(getID());
if (parent.assembleInputData(getID(), results)) {
return results;
}
}
return null;
}
};
ThreadPool currentPool = ThreadPool.currentPool();
if (currentPool != null) {
return currentPool.runInvisible(c);
} else {
try {
return c.call();
} catch (Exception e) {
throw new ExecutionException(e);
}
}
}
use of org.knime.core.util.ThreadPool in project knime-core by knime.
the class TreeEnsembleLearner method learnEnsemble.
public TreeEnsembleModel learnEnsemble(final ExecutionMonitor exec) throws CanceledExecutionException, ExecutionException {
final int nrModels = m_config.getNrModels();
final RandomData rd = m_config.createRandomData();
final ThreadPool tp = KNIMEConstants.GLOBAL_THREAD_POOL;
final AtomicReference<Throwable> learnThrowableRef = new AtomicReference<Throwable>();
@SuppressWarnings("unchecked") final Future<TreeLearnerResult>[] modelFutures = new Future[nrModels];
final int procCount = 3 * Runtime.getRuntime().availableProcessors() / 2;
final Semaphore semaphore = new Semaphore(procCount);
Callable<TreeLearnerResult[]> learnCallable = new Callable<TreeLearnerResult[]>() {
@Override
public TreeLearnerResult[] call() throws Exception {
final TreeLearnerResult[] results = new TreeLearnerResult[nrModels];
for (int i = 0; i < nrModels; i++) {
semaphore.acquire();
finishedTree(i - procCount, exec);
checkThrowable(learnThrowableRef);
RandomData rdSingle = TreeEnsembleLearnerConfiguration.createRandomData(rd.nextLong(Long.MIN_VALUE, Long.MAX_VALUE));
ExecutionMonitor subExec = exec.createSubProgress(0.0);
modelFutures[i] = tp.enqueue(new TreeLearnerCallable(subExec, rdSingle, learnThrowableRef, semaphore));
}
for (int i = 0; i < procCount; i++) {
semaphore.acquire();
finishedTree(nrModels - 1 + i - procCount, exec);
}
for (int i = 0; i < nrModels; i++) {
try {
results[i] = modelFutures[i].get();
} catch (Exception e) {
learnThrowableRef.compareAndSet(null, e);
}
}
return results;
}
private void finishedTree(final int treeIndex, final ExecutionMonitor progMon) {
if (treeIndex > 0) {
progMon.setProgress(treeIndex / (double) nrModels, "Tree " + treeIndex + "/" + nrModels);
}
}
};
TreeLearnerResult[] modelResults = tp.runInvisible(learnCallable);
checkThrowable(learnThrowableRef);
AbstractTreeModel[] models = new AbstractTreeModel[nrModels];
m_rowSamples = new RowSample[nrModels];
m_columnSampleStrategies = new ColumnSampleStrategy[nrModels];
for (int i = 0; i < nrModels; i++) {
models[i] = modelResults[i].m_treeModel;
m_rowSamples[i] = modelResults[i].m_rowSample;
m_columnSampleStrategies[i] = modelResults[i].m_rootColumnSampleStrategy;
}
m_ensembleModel = new TreeEnsembleModel(m_config, m_data.getMetaData(), models, m_data.getTreeType());
return m_ensembleModel;
}
use of org.knime.core.util.ThreadPool in project knime-core by knime.
the class Proximity method calcProximities.
public static ProximityMatrix calcProximities(final BufferedDataTable[] tables, final TreeEnsembleModelPortObject modelPortObject, final ExecutionContext exec) throws InvalidSettingsException, InterruptedException, ExecutionException, CanceledExecutionException {
ProximityMatrix proximityMatrix = null;
boolean optionalTable = false;
switch(tables.length) {
case 1:
if (tables[0].size() <= 65500) {
proximityMatrix = new SingleTableProximityMatrix(tables[0]);
} else {
// this is unfortunate and we should maybe think of a different solution
proximityMatrix = new TwoTablesProximityMatrix(tables[0], tables[0]);
}
break;
case 2:
optionalTable = true;
proximityMatrix = new TwoTablesProximityMatrix(tables[0], tables[1]);
break;
default:
throw new IllegalArgumentException("Currently only up to two tables are supported.");
}
final TreeEnsembleModelPortObjectSpec modelSpec = modelPortObject.getSpec();
final TreeEnsembleModel ensembleModel = modelPortObject.getEnsembleModel();
int[][] learnColIndicesInTables = null;
if (optionalTable) {
learnColIndicesInTables = new int[][] { modelSpec.calculateFilterIndices(tables[0].getDataTableSpec()), modelSpec.calculateFilterIndices(tables[1].getDataTableSpec()) };
} else {
learnColIndicesInTables = new int[][] { modelSpec.calculateFilterIndices(tables[0].getDataTableSpec()) };
}
final ThreadPool tp = KNIMEConstants.GLOBAL_THREAD_POOL;
final int procCount = 3 * Runtime.getRuntime().availableProcessors() / 2;
final Semaphore semaphore = new Semaphore(procCount);
final AtomicReference<Throwable> proxThrowableRef = new AtomicReference<Throwable>();
final int nrTrees = ensembleModel.getNrModels();
final Future<?>[] calcFutures = new Future<?>[nrTrees];
exec.setProgress(0, "Starting proximity calculation per tree.");
for (int i = 0; i < nrTrees; i++) {
semaphore.acquire();
finishedTree(i, exec, nrTrees);
checkThrowable(proxThrowableRef);
AbstractTreeModel treeModel = ensembleModel.getTreeModel(i);
ExecutionMonitor subExec = exec.createSubProgress(0.0);
if (optionalTable) {
calcFutures[i] = tp.enqueue(new TwoTablesProximityCalcRunnable(proximityMatrix, tables, learnColIndicesInTables, treeModel, modelPortObject, semaphore, proxThrowableRef, subExec));
} else {
calcFutures[i] = tp.enqueue(new SingleTableProximityCalcRunnable(proximityMatrix, tables, learnColIndicesInTables, treeModel, modelPortObject, semaphore, proxThrowableRef, subExec));
}
}
for (int i = 0; i < procCount; i++) {
semaphore.acquire();
finishedTree(nrTrees - procCount + i, exec, nrTrees);
}
for (Future<?> future : calcFutures) {
try {
future.get();
} catch (Exception e) {
proxThrowableRef.compareAndSet(null, e);
}
}
checkThrowable(proxThrowableRef);
proximityMatrix.normalize(1.0 / nrTrees);
return proximityMatrix;
}
use of org.knime.core.util.ThreadPool in project knime-core by knime.
the class TreeEnsembleLearner method learnEnsemble.
public TreeEnsembleModel learnEnsemble(final ExecutionMonitor exec) throws CanceledExecutionException, ExecutionException {
final int nrModels = m_config.getNrModels();
final RandomData rd = m_config.createRandomData();
final ThreadPool tp = KNIMEConstants.GLOBAL_THREAD_POOL;
final AtomicReference<Throwable> learnThrowableRef = new AtomicReference<Throwable>();
@SuppressWarnings("unchecked") final Future<TreeLearnerResult>[] modelFutures = new Future[nrModels];
final int procCount = 3 * Runtime.getRuntime().availableProcessors() / 2;
final Semaphore semaphore = new Semaphore(procCount);
Callable<TreeLearnerResult[]> learnCallable = new Callable<TreeLearnerResult[]>() {
@Override
public TreeLearnerResult[] call() throws Exception {
final TreeLearnerResult[] results = new TreeLearnerResult[nrModels];
for (int i = 0; i < nrModels; i++) {
semaphore.acquire();
finishedTree(i - procCount, exec);
checkThrowable(learnThrowableRef);
RandomData rdSingle = TreeEnsembleLearnerConfiguration.createRandomData(rd.nextLong(Long.MIN_VALUE, Long.MAX_VALUE));
ExecutionMonitor subExec = exec.createSubProgress(0.0);
modelFutures[i] = tp.enqueue(new TreeLearnerCallable(subExec, rdSingle, learnThrowableRef, semaphore));
}
for (int i = 0; i < procCount; i++) {
semaphore.acquire();
finishedTree(nrModels - 1 + i - procCount, exec);
}
for (int i = 0; i < nrModels; i++) {
try {
results[i] = modelFutures[i].get();
} catch (Exception e) {
learnThrowableRef.compareAndSet(null, e);
}
}
return results;
}
private void finishedTree(final int treeIndex, final ExecutionMonitor progMon) {
if (treeIndex > 0) {
progMon.setProgress(treeIndex / (double) nrModels, "Tree " + treeIndex + "/" + nrModels);
}
}
};
TreeLearnerResult[] modelResults = tp.runInvisible(learnCallable);
checkThrowable(learnThrowableRef);
AbstractTreeModel[] models = new AbstractTreeModel[nrModels];
m_rowSamples = new RowSample[nrModels];
m_columnSampleStrategies = new ColumnSampleStrategy[nrModels];
for (int i = 0; i < nrModels; i++) {
models[i] = modelResults[i].m_treeModel;
m_rowSamples[i] = modelResults[i].m_rowSample;
m_columnSampleStrategies[i] = modelResults[i].m_rootColumnSampleStrategy;
}
m_ensembleModel = new TreeEnsembleModel(m_config, m_data.getMetaData(), models, m_data.getTreeType());
return m_ensembleModel;
}
Aggregations