use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class NormalizerNodeModel method calculate.
/**
* New normalized {@link org.knime.core.data.DataTable} is created depending
* on the mode.
*/
/**
* @param inData The input data.
* @param exec For BufferedDataTable creation and progress.
* @return the result of the calculation
* @throws Exception If the node calculation fails for any reason.
*/
protected CalculationResult calculate(final PortObject[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable inTable = (BufferedDataTable) inData[0];
DataTableSpec inSpec = inTable.getSpec();
// extract selected numeric columns
updateNumericColumnSelection(inSpec);
Normalizer ntable = new Normalizer(inTable, m_columns);
long rowcount = inTable.size();
ExecutionMonitor prepareExec = exec.createSubProgress(0.3);
AffineTransTable outTable;
boolean fixDomainBounds = false;
switch(m_mode) {
case NONORM_MODE:
return new CalculationResult(inTable, new DataTableSpec(), new AffineTransConfiguration());
case MINMAX_MODE:
fixDomainBounds = true;
outTable = ntable.doMinMaxNorm(m_max, m_min, prepareExec);
break;
case ZSCORE_MODE:
outTable = ntable.doZScoreNorm(prepareExec);
break;
case DECIMALSCALING_MODE:
outTable = ntable.doDecimalScaling(prepareExec);
break;
default:
throw new Exception("No mode set");
}
if (outTable.getErrorMessage() != null) {
// something went wrong, report and throw an exception
throw new Exception(outTable.getErrorMessage());
}
if (ntable.getErrorMessage() != null) {
// something went wrong during initialization, report.
setWarningMessage(ntable.getErrorMessage());
}
DataTableSpec modelSpec = FilterColumnTable.createFilterTableSpec(inSpec, m_columns);
AffineTransConfiguration configuration = outTable.getConfiguration();
DataTableSpec spec = outTable.getDataTableSpec();
// the same transformation, which is not guaranteed to snap to min/max)
if (fixDomainBounds) {
DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
for (int i = 0; i < newColSpecs.length; i++) {
newColSpecs[i] = spec.getColumnSpec(i);
}
for (int i = 0; i < m_columns.length; i++) {
int index = spec.findColumnIndex(m_columns[i]);
DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
DataColumnDomainCreator domCreator = new DataColumnDomainCreator(newColSpecs[index].getDomain());
domCreator.setLowerBound(new DoubleCell(m_min));
domCreator.setUpperBound(new DoubleCell(m_max));
creator.setDomain(domCreator.createDomain());
newColSpecs[index] = creator.createSpec();
}
spec = new DataTableSpec(spec.getName(), newColSpecs);
}
ExecutionMonitor normExec = exec.createSubProgress(.7);
BufferedDataContainer container = exec.createDataContainer(spec);
long count = 1;
for (DataRow row : outTable) {
normExec.checkCanceled();
normExec.setProgress(count / (double) rowcount, "Normalizing row no. " + count + " of " + rowcount + " (\"" + row.getKey() + "\")");
container.addRowToTable(row);
count++;
}
container.close();
return new CalculationResult(container.getTable(), modelSpec, configuration);
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class EnrichmentPlotterModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final double rowCount = inData[0].size();
final BufferedDataContainer areaOutCont = exec.createDataContainer(AREA_OUT_SPEC);
final BufferedDataContainer discrateOutCont = exec.createDataContainer(DISCRATE_OUT_SPEC);
for (int i = 0; i < m_settings.getCurveCount(); i++) {
final ExecutionMonitor sexec = exec.createSubProgress(1.0 / m_settings.getCurveCount());
exec.setMessage("Generating curve " + (i + 1));
final Curve c = m_settings.getCurve(i);
final Helper[] curve = new Helper[KnowsRowCountTable.checkRowCount(inData[0].size())];
final int sortIndex = inData[0].getDataTableSpec().findColumnIndex(c.getSortColumn());
final int actIndex = inData[0].getDataTableSpec().findColumnIndex(c.getActivityColumn());
int k = 0, maxK = 0;
for (DataRow row : inData[0]) {
DataCell c1 = row.getCell(sortIndex);
DataCell c2 = row.getCell(actIndex);
if (k++ % 100 == 0) {
sexec.checkCanceled();
sexec.setProgress(k / rowCount);
}
if (c1.isMissing()) {
continue;
} else {
curve[maxK] = new Helper(((DoubleValue) c1).getDoubleValue(), c2);
}
maxK++;
}
Arrays.sort(curve, 0, maxK);
if (c.isSortDescending()) {
for (int j = 0; j < maxK / 2; j++) {
Helper h = curve[j];
curve[j] = curve[maxK - j - 1];
curve[maxK - j - 1] = h;
}
}
// this is for down-sampling so that the view is faster;
// plotting >100,000 points takes quite a long time
final int size = Math.min(MAX_RESOLUTION, maxK);
final double downSampleRate = maxK / (double) size;
final double[] xValues = new double[size + 1];
final double[] yValues = new double[size + 1];
xValues[0] = 0;
yValues[0] = 0;
int lastK = 0;
double y = 0, area = 0;
int nextHitRatePoint = 0;
final double[] hitRateValues = new double[DISCRATE_POINTS.length];
final HashMap<DataCell, MutableInteger> clusters = new HashMap<DataCell, MutableInteger>();
for (k = 1; k <= maxK; k++) {
final Helper h = curve[k - 1];
if (m_settings.plotMode() == PlotMode.PlotSum) {
y += ((DoubleValue) h.b).getDoubleValue();
} else if (m_settings.plotMode() == PlotMode.PlotHits) {
if (!h.b.isMissing() && (((DoubleValue) h.b).getDoubleValue() >= m_settings.hitThreshold())) {
y++;
}
} else if (!h.b.isMissing()) {
MutableInteger count = clusters.get(h.b);
if (count == null) {
count = new MutableInteger(0);
clusters.put(h.b, count);
}
if (count.inc() == m_settings.minClusterMembers()) {
y++;
}
}
area += y / maxK;
if ((int) (k / downSampleRate) >= lastK + 1) {
lastK++;
xValues[lastK] = k;
yValues[lastK] = y;
}
if ((nextHitRatePoint < DISCRATE_POINTS.length) && (k == (int) Math.floor(maxK * DISCRATE_POINTS[nextHitRatePoint] / 100))) {
hitRateValues[nextHitRatePoint] = y;
nextHitRatePoint++;
}
}
xValues[xValues.length - 1] = maxK;
yValues[yValues.length - 1] = y;
area /= y;
m_curves.add(new EnrichmentPlot(c.getSortColumn() + " vs " + c.getActivityColumn(), xValues, yValues, area));
areaOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
for (int j = 0; j < hitRateValues.length; j++) {
hitRateValues[j] /= y;
}
discrateOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), hitRateValues));
}
areaOutCont.close();
discrateOutCont.close();
return new BufferedDataTable[] { areaOutCont.getTable(), discrateOutCont.getTable() };
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class LogisticRegressionContent method createTablePortObject.
/**
* Creates a BufferedDataTable with the
* @param exec The execution context
* @return a port object
*/
public BufferedDataTable createTablePortObject(final ExecutionContext exec) {
DataTableSpec tableOutSpec = new DataTableSpec("Coefficients and Statistics", new String[] { "Logit", "Variable", "Coeff.", "Std. Err.", "z-score", "P>|z|" }, new DataType[] { StringCell.TYPE, StringCell.TYPE, DoubleCell.TYPE, DoubleCell.TYPE, DoubleCell.TYPE, DoubleCell.TYPE });
BufferedDataContainer dc = exec.createDataContainer(tableOutSpec);
List<DataCell> logits = this.getLogits();
List<String> parameters = this.getParameters();
int c = 0;
for (DataCell logit : logits) {
Map<String, Double> coefficients = this.getCoefficients(logit);
Map<String, Double> stdErrs = this.getStandardErrors(logit);
Map<String, Double> zScores = this.getZScores(logit);
Map<String, Double> pValues = this.getPValues(logit);
for (String parameter : parameters) {
List<DataCell> cells = new ArrayList<DataCell>();
cells.add(new StringCell(logit.toString()));
cells.add(new StringCell(parameter));
cells.add(new DoubleCell(coefficients.get(parameter)));
cells.add(new DoubleCell(stdErrs.get(parameter)));
cells.add(new DoubleCell(zScores.get(parameter)));
cells.add(new DoubleCell(pValues.get(parameter)));
c++;
dc.addRowToTable(new DefaultRow("Row" + c, cells));
}
List<DataCell> cells = new ArrayList<DataCell>();
cells.add(new StringCell(logit.toString()));
cells.add(new StringCell("Constant"));
cells.add(new DoubleCell(this.getIntercept(logit)));
cells.add(new DoubleCell(this.getInterceptStdErr(logit)));
cells.add(new DoubleCell(this.getInterceptZScore(logit)));
cells.add(new DoubleCell(this.getInterceptPValue(logit)));
c++;
dc.addRowToTable(new DefaultRow("Row" + c, cells));
}
dc.close();
return dc.getTable();
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class LogisticRegressionContent method createTablePortObject.
/**
* Creates a BufferedDataTable with the
* @param exec The execution context
* @return a port object
*/
public BufferedDataTable createTablePortObject(final ExecutionContext exec) {
DataTableSpec tableOutSpec = new DataTableSpec("Coefficients and Statistics", new String[] { "Logit", "Variable", "Coeff.", "Std. Err.", "z-score", "P>|z|" }, new DataType[] { StringCell.TYPE, StringCell.TYPE, DoubleCell.TYPE, DoubleCell.TYPE, DoubleCell.TYPE, DoubleCell.TYPE });
BufferedDataContainer dc = exec.createDataContainer(tableOutSpec);
List<DataCell> logits = this.getLogits();
List<String> parameters = this.getParameters();
int c = 0;
for (DataCell logit : logits) {
Map<String, Double> coefficients = this.getCoefficients(logit);
Map<String, Double> stdErrs = this.getStandardErrors(logit);
Map<String, Double> zScores = this.getZScores(logit);
Map<String, Double> pValues = this.getPValues(logit);
for (String parameter : parameters) {
List<DataCell> cells = new ArrayList<DataCell>();
cells.add(new StringCell(logit.toString()));
cells.add(new StringCell(parameter));
cells.add(new DoubleCell(coefficients.get(parameter)));
cells.add(new DoubleCell(stdErrs.get(parameter)));
cells.add(new DoubleCell(zScores.get(parameter)));
cells.add(new DoubleCell(pValues.get(parameter)));
c++;
dc.addRowToTable(new DefaultRow("Row" + c, cells));
}
List<DataCell> cells = new ArrayList<DataCell>();
cells.add(new StringCell(logit.toString()));
cells.add(new StringCell("Constant"));
cells.add(new DoubleCell(this.getIntercept(logit)));
cells.add(new DoubleCell(this.getInterceptStdErr(logit)));
cells.add(new DoubleCell(this.getInterceptZScore(logit)));
cells.add(new DoubleCell(this.getInterceptPValue(logit)));
c++;
dc.addRowToTable(new DefaultRow("Row" + c, cells));
}
dc.close();
return dc.getTable();
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class MissingValueHandling3Table method createMissingValueHandlingTable.
/**
* Does missing value handling to the argument table given the col settings in an array and also reports progress.
*
* @param table the table to do missing value handling on
* @param colSettings the settings
* @param exec for progress/cancel and to create the buffered data table
* @param warningBuffer To which potential warning messages are added.
* @return a cache table, cleaned up
* @throws CanceledExecutionException if canceled
* @since 2.10
*/
public static BufferedDataTable createMissingValueHandlingTable(final BufferedDataTable table, final MissingValueHandling2ColSetting[] colSettings, final ExecutionContext exec, final StringBuilder warningBuffer) throws CanceledExecutionException {
MissingValueHandling2ColSetting[] colSetting;
try {
colSetting = getColSetting(table.getDataTableSpec(), colSettings, false, warningBuffer);
} catch (InvalidSettingsException ise) {
LOGGER.coding("getColSetting method is not supposed to throw an exception, ignoring settings", ise);
DataTableSpec s = table.getDataTableSpec();
colSetting = new MissingValueHandling2ColSetting[s.getNumColumns()];
for (int i = 0; i < s.getNumColumns(); i++) {
colSetting[i] = new MissingValueHandling2ColSetting(s.getColumnSpec(i));
colSetting[i].setMethod(MissingValueHandling2ColSetting.METHOD_NO_HANDLING);
}
}
boolean needStatistics = false;
final Set<Integer> mostFrequentColumns = new HashSet<Integer>();
for (int i = 0; i < colSetting.length; i++) {
MissingValueHandling2ColSetting c = colSetting[i];
switch(c.getMethod()) {
case MissingValueHandling2ColSetting.METHOD_MOST_FREQUENT:
mostFrequentColumns.add(i);
case MissingValueHandling2ColSetting.METHOD_MAX:
case MissingValueHandling2ColSetting.METHOD_MIN:
case MissingValueHandling2ColSetting.METHOD_MEAN:
needStatistics = true;
break;
default:
}
}
MyStatisticsTable myT;
ExecutionMonitor e;
if (needStatistics) {
// for creating statistics table
ExecutionContext subExec = exec.createSubExecutionContext(0.5);
myT = new MyStatisticsTable(table, subExec) {
// do not try to get this Iterable in the constructor, it will not work, as long as
// Statistics3Table does the statistical computation in the constructor.
@Override
protected Iterable<Integer> getMostFrequentColumns() {
return mostFrequentColumns;
}
};
if (myT.m_warningMessage != null) {
if (warningBuffer.length() > 0) {
warningBuffer.append('\n');
}
warningBuffer.append(myT.m_warningMessage);
}
// for the iterator
e = exec.createSubProgress(0.5);
} else {
myT = null;
e = exec;
}
MissingValueHandling3Table mvht = new MissingValueHandling3Table(table, myT, colSetting);
BufferedDataContainer container = exec.createDataContainer(mvht.getDataTableSpec());
e.setMessage("Adding rows...");
int count = 0;
try {
MissingValueHandling3TableIterator it = new MissingValueHandling3TableIterator(mvht, e);
while (it.hasNext()) {
DataRow next;
next = it.next();
e.setMessage("Adding row " + (count + 1) + " (\"" + next.getKey() + "\")");
container.addRowToTable(next);
count++;
}
} catch (MissingValueHandling3TableIterator.RuntimeCanceledExecutionException rcee) {
throw rcee.getCause();
} finally {
container.close();
}
return container.getTable();
}
Aggregations