use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.
the class TreeEnsembleClassificationLearnerNodeModel method saveInternals.
/**
* {@inheritDoc}
*/
@Override
protected void saveInternals(final File nodeInternDir, final ExecutionMonitor exec) throws IOException, CanceledExecutionException {
File file;
ExecutionMonitor sub;
if (m_oldStyleEnsembleModel_deprecated != null) {
// old workflow (<2.10) loaded and saved ...
file = new File(nodeInternDir, INTERNAL_TREES_FILE);
OutputStream out = new GZIPOutputStream(new FileOutputStream(file));
sub = exec.createSubProgress(0.2);
m_oldStyleEnsembleModel_deprecated.save(out, sub);
out.close();
}
if (m_hiliteRowSample != null) {
file = new File(nodeInternDir, INTERNAL_DATASAMPLE_FILE);
sub = exec.createSubProgress(0.2);
DataContainer.writeToZip(m_hiliteRowSample, file, sub);
}
if (m_viewMessage != null) {
file = new File(nodeInternDir, INTERNAL_INFO_FILE);
NodeSettings sets = new NodeSettings("ensembleData");
sets.addString("view_warning", m_viewMessage);
sets.saveToXML(new FileOutputStream(file));
}
}
use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.
the class NormalizerNodeModel method calculate.
/**
* New normalized {@link org.knime.core.data.DataTable} is created depending
* on the mode.
*/
/**
* @param inData The input data.
* @param exec For BufferedDataTable creation and progress.
* @return the result of the calculation
* @throws Exception If the node calculation fails for any reason.
*/
protected CalculationResult calculate(final PortObject[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable inTable = (BufferedDataTable) inData[0];
DataTableSpec inSpec = inTable.getSpec();
// extract selected numeric columns
updateNumericColumnSelection(inSpec);
Normalizer ntable = new Normalizer(inTable, m_columns);
long rowcount = inTable.size();
ExecutionMonitor prepareExec = exec.createSubProgress(0.3);
AffineTransTable outTable;
boolean fixDomainBounds = false;
switch(m_mode) {
case NONORM_MODE:
return new CalculationResult(inTable, new DataTableSpec(), new AffineTransConfiguration());
case MINMAX_MODE:
fixDomainBounds = true;
outTable = ntable.doMinMaxNorm(m_max, m_min, prepareExec);
break;
case ZSCORE_MODE:
outTable = ntable.doZScoreNorm(prepareExec);
break;
case DECIMALSCALING_MODE:
outTable = ntable.doDecimalScaling(prepareExec);
break;
default:
throw new Exception("No mode set");
}
if (outTable.getErrorMessage() != null) {
// something went wrong, report and throw an exception
throw new Exception(outTable.getErrorMessage());
}
if (ntable.getErrorMessage() != null) {
// something went wrong during initialization, report.
setWarningMessage(ntable.getErrorMessage());
}
DataTableSpec modelSpec = FilterColumnTable.createFilterTableSpec(inSpec, m_columns);
AffineTransConfiguration configuration = outTable.getConfiguration();
DataTableSpec spec = outTable.getDataTableSpec();
// the same transformation, which is not guaranteed to snap to min/max)
if (fixDomainBounds) {
DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
for (int i = 0; i < newColSpecs.length; i++) {
newColSpecs[i] = spec.getColumnSpec(i);
}
for (int i = 0; i < m_columns.length; i++) {
int index = spec.findColumnIndex(m_columns[i]);
DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
DataColumnDomainCreator domCreator = new DataColumnDomainCreator(newColSpecs[index].getDomain());
domCreator.setLowerBound(new DoubleCell(m_min));
domCreator.setUpperBound(new DoubleCell(m_max));
creator.setDomain(domCreator.createDomain());
newColSpecs[index] = creator.createSpec();
}
spec = new DataTableSpec(spec.getName(), newColSpecs);
}
ExecutionMonitor normExec = exec.createSubProgress(.7);
BufferedDataContainer container = exec.createDataContainer(spec);
long count = 1;
for (DataRow row : outTable) {
normExec.checkCanceled();
normExec.setProgress(count / (double) rowcount, "Normalizing row no. " + count + " of " + rowcount + " (\"" + row.getKey() + "\")");
container.addRowToTable(row);
count++;
}
container.close();
return new CalculationResult(container.getTable(), modelSpec, configuration);
}
use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.
the class EnrichmentPlotterModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final double rowCount = inData[0].size();
final BufferedDataContainer areaOutCont = exec.createDataContainer(AREA_OUT_SPEC);
final BufferedDataContainer discrateOutCont = exec.createDataContainer(DISCRATE_OUT_SPEC);
for (int i = 0; i < m_settings.getCurveCount(); i++) {
final ExecutionMonitor sexec = exec.createSubProgress(1.0 / m_settings.getCurveCount());
exec.setMessage("Generating curve " + (i + 1));
final Curve c = m_settings.getCurve(i);
final Helper[] curve = new Helper[KnowsRowCountTable.checkRowCount(inData[0].size())];
final int sortIndex = inData[0].getDataTableSpec().findColumnIndex(c.getSortColumn());
final int actIndex = inData[0].getDataTableSpec().findColumnIndex(c.getActivityColumn());
int k = 0, maxK = 0;
for (DataRow row : inData[0]) {
DataCell c1 = row.getCell(sortIndex);
DataCell c2 = row.getCell(actIndex);
if (k++ % 100 == 0) {
sexec.checkCanceled();
sexec.setProgress(k / rowCount);
}
if (c1.isMissing()) {
continue;
} else {
curve[maxK] = new Helper(((DoubleValue) c1).getDoubleValue(), c2);
}
maxK++;
}
Arrays.sort(curve, 0, maxK);
if (c.isSortDescending()) {
for (int j = 0; j < maxK / 2; j++) {
Helper h = curve[j];
curve[j] = curve[maxK - j - 1];
curve[maxK - j - 1] = h;
}
}
// this is for down-sampling so that the view is faster;
// plotting >100,000 points takes quite a long time
final int size = Math.min(MAX_RESOLUTION, maxK);
final double downSampleRate = maxK / (double) size;
final double[] xValues = new double[size + 1];
final double[] yValues = new double[size + 1];
xValues[0] = 0;
yValues[0] = 0;
int lastK = 0;
double y = 0, area = 0;
int nextHitRatePoint = 0;
final double[] hitRateValues = new double[DISCRATE_POINTS.length];
final HashMap<DataCell, MutableInteger> clusters = new HashMap<DataCell, MutableInteger>();
for (k = 1; k <= maxK; k++) {
final Helper h = curve[k - 1];
if (m_settings.plotMode() == PlotMode.PlotSum) {
y += ((DoubleValue) h.b).getDoubleValue();
} else if (m_settings.plotMode() == PlotMode.PlotHits) {
if (!h.b.isMissing() && (((DoubleValue) h.b).getDoubleValue() >= m_settings.hitThreshold())) {
y++;
}
} else if (!h.b.isMissing()) {
MutableInteger count = clusters.get(h.b);
if (count == null) {
count = new MutableInteger(0);
clusters.put(h.b, count);
}
if (count.inc() == m_settings.minClusterMembers()) {
y++;
}
}
area += y / maxK;
if ((int) (k / downSampleRate) >= lastK + 1) {
lastK++;
xValues[lastK] = k;
yValues[lastK] = y;
}
if ((nextHitRatePoint < DISCRATE_POINTS.length) && (k == (int) Math.floor(maxK * DISCRATE_POINTS[nextHitRatePoint] / 100))) {
hitRateValues[nextHitRatePoint] = y;
nextHitRatePoint++;
}
}
xValues[xValues.length - 1] = maxK;
yValues[yValues.length - 1] = y;
area /= y;
m_curves.add(new EnrichmentPlot(c.getSortColumn() + " vs " + c.getActivityColumn(), xValues, yValues, area));
areaOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
for (int j = 0; j < hitRateValues.length; j++) {
hitRateValues[j] /= y;
}
discrateOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), hitRateValues));
}
areaOutCont.close();
discrateOutCont.close();
return new BufferedDataTable[] { areaOutCont.getTable(), discrateOutCont.getTable() };
}
use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.
the class CollectionSplitNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable table = inData[0];
DataTableSpec spec = table.getDataTableSpec();
ExecutionMonitor execForCR = exec;
// validate settings
getTargetColIndex(spec);
DataColumnSpec[] colSpecs;
switch(m_settings.getCountElementsPolicy()) {
case Count:
execForCR = exec.createSubProgress(0.7);
ExecutionMonitor e = exec.createSubProgress(0.3);
colSpecs = countNewColumns(table, e);
break;
case UseElementNamesOrFail:
colSpecs = getColSpecsByElementNames(spec);
break;
case BestEffort:
try {
colSpecs = getColSpecsByElementNames(spec);
} catch (InvalidSettingsException ise) {
execForCR = exec.createSubProgress(0.7);
e = exec.createSubProgress(0.3);
colSpecs = countNewColumns(table, e);
}
break;
default:
throw new InvalidSettingsException("Unsupported policy: " + m_settings.getCountElementsPolicy());
}
Pair<ColumnRearranger, SplitCellFactory> pair = createColumnRearranger(spec, colSpecs);
BufferedDataTable out = exec.createColumnRearrangeTable(table, pair.getFirst(), execForCR);
String warnMessage = pair.getSecond().getWarnMessage();
if (warnMessage != null) {
setWarningMessage(warnMessage);
}
if (m_settings.isDetermineMostSpecificDataType()) {
out = refineTypes(out, pair.getSecond(), exec);
}
return new BufferedDataTable[] { out };
}
use of org.knime.core.node.ExecutionMonitor in project knime-core by knime.
the class Learner method perform.
/**
* @param data The data table.
* @param exec The execution context used for reporting progress.
* @return An object which holds the results.
* @throws CanceledExecutionException when method is cancelled
* @throws InvalidSettingsException When settings are inconsistent with the data
*/
public LogisticRegressionContent perform(final BufferedDataTable data, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
exec.checkCanceled();
int iter = 0;
boolean converged = false;
final RegressionTrainingData trainingData = new RegressionTrainingData(data, m_outSpec, m_specialColumns, true, m_targetReferenceCategory, m_sortTargetCategories, m_sortFactorsCategories);
int targetIndex = data.getDataTableSpec().findColumnIndex(m_outSpec.getTargetCols().get(0).getName());
final int tcC = trainingData.getDomainValues().get(targetIndex).size();
final int rC = trainingData.getRegressorCount();
final RealMatrix beta = new Array2DRowRealMatrix(1, (tcC - 1) * (rC + 1));
Double loglike = 0.0;
Double loglikeOld = 0.0;
exec.setMessage("Iterative optimization. Processing iteration 1.");
// main loop
while (iter < m_maxIter && !converged) {
RealMatrix betaOld = beta.copy();
loglikeOld = loglike;
// Do heavy work in a separate thread which allows to interrupt it
// note the queue may block if no more threads are available (e.g. thread count = 1)
// as soon as we stall in 'get' this thread reduces the number of running thread
Future<Double> future = ThreadPool.currentPool().enqueue(new Callable<Double>() {
@Override
public Double call() throws Exception {
final ExecutionMonitor progMon = exec.createSubProgress(1.0 / m_maxIter);
irlsRls(trainingData, beta, rC, tcC, progMon);
progMon.setProgress(1.0);
return likelihood(trainingData.iterator(), beta, rC, tcC, exec);
}
});
try {
loglike = future.get();
} catch (InterruptedException e) {
future.cancel(true);
exec.checkCanceled();
throw new RuntimeException(e);
} catch (ExecutionException e) {
if (e.getCause() instanceof RuntimeException) {
throw (RuntimeException) e.getCause();
} else {
throw new RuntimeException(e.getCause());
}
}
if (Double.isInfinite(loglike) || Double.isNaN(loglike)) {
throw new RuntimeException(FAILING_MSG);
}
exec.checkCanceled();
// test for decreasing likelihood
while ((Double.isInfinite(loglike) || Double.isNaN(loglike) || loglike < loglikeOld) && iter > 0) {
converged = true;
for (int k = 0; k < beta.getRowDimension(); k++) {
if (abs(beta.getEntry(k, 0) - betaOld.getEntry(k, 0)) > m_eps * abs(betaOld.getEntry(k, 0))) {
converged = false;
break;
}
}
if (converged) {
break;
}
// half the step size of beta
beta.setSubMatrix((beta.add(betaOld)).scalarMultiply(0.5).getData(), 0, 0);
exec.checkCanceled();
loglike = likelihood(trainingData.iterator(), beta, rC, tcC, exec);
exec.checkCanceled();
}
// test for convergence
converged = true;
for (int k = 0; k < beta.getRowDimension(); k++) {
if (abs(beta.getEntry(k, 0) - betaOld.getEntry(k, 0)) > m_eps * abs(betaOld.getEntry(k, 0))) {
converged = false;
break;
}
}
iter++;
LOGGER.debug("#Iterations: " + iter);
LOGGER.debug("Log Likelihood: " + loglike);
StringBuilder betaBuilder = new StringBuilder();
for (int i = 0; i < beta.getRowDimension() - 1; i++) {
betaBuilder.append(Double.toString(beta.getEntry(i, 0)));
betaBuilder.append(", ");
}
if (beta.getRowDimension() > 0) {
betaBuilder.append(Double.toString(beta.getEntry(beta.getRowDimension() - 1, 0)));
}
LOGGER.debug("beta: " + betaBuilder.toString());
exec.checkCanceled();
exec.setMessage("Iterative optimization. #Iterations: " + iter + " | Log-likelihood: " + DoubleFormat.formatDouble(loglike) + ". Processing iteration " + (iter + 1) + ".");
}
// The covariance matrix
RealMatrix covMat = new QRDecomposition(A).getSolver().getInverse().scalarMultiply(-1);
List<String> factorList = new ArrayList<String>();
List<String> covariateList = new ArrayList<String>();
Map<String, List<DataCell>> factorDomainValues = new HashMap<String, List<DataCell>>();
for (int i : trainingData.getActiveCols()) {
DataColumnSpec columnSpec = data.getDataTableSpec().getColumnSpec(i);
if (trainingData.getIsNominal().get(i)) {
String factor = columnSpec.getName();
factorList.add(factor);
List<DataCell> values = trainingData.getDomainValues().get(i);
factorDomainValues.put(factor, values);
} else {
if (columnSpec.getType().isCompatible(BitVectorValue.class) || columnSpec.getType().isCompatible(ByteVectorValue.class)) {
int length = trainingData.getVectorLengths().getOrDefault(i, 0).intValue();
for (int j = 0; j < length; ++j) {
covariateList.add(columnSpec.getName() + "[" + j + "]");
}
} else {
covariateList.add(columnSpec.getName());
}
}
}
final Map<? extends Integer, Integer> vectorIndexLengths = trainingData.getVectorLengths();
final Map<String, Integer> vectorLengths = new LinkedHashMap<String, Integer>();
for (DataColumnSpec spec : m_specialColumns) {
int colIndex = data.getSpec().findColumnIndex(spec.getName());
if (colIndex >= 0) {
vectorLengths.put(spec.getName(), vectorIndexLengths.get(colIndex));
}
}
// create content
LogisticRegressionContent content = new LogisticRegressionContent(m_outSpec, factorList, covariateList, vectorLengths, m_targetReferenceCategory, m_sortTargetCategories, m_sortFactorsCategories, beta, loglike, covMat, iter);
return content;
}
Aggregations