use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class ConditionalBoxPlotNodeModel method createOutputTable.
private BufferedDataContainer createOutputTable(final DataTableSpec tableSpec, final DataColumnSpec[] colSpecs, final ExecutionContext exec) {
BufferedDataContainer cont = exec.createDataContainer(createOutputSpec(tableSpec));
RowKey[] rowKeys = new RowKey[BoxPlotNodeModel.SIZE];
rowKeys[BoxPlotNodeModel.MIN] = new RowKey("Minimum");
rowKeys[BoxPlotNodeModel.LOWER_WHISKER] = new RowKey("Lower Whisker");
rowKeys[BoxPlotNodeModel.LOWER_QUARTILE] = new RowKey("Lower Quartile");
rowKeys[BoxPlotNodeModel.MEDIAN] = new RowKey("Median");
rowKeys[BoxPlotNodeModel.UPPER_QUARTILE] = new RowKey("Upper Quartile");
rowKeys[BoxPlotNodeModel.UPPER_WHISKER] = new RowKey("Upper Whisker");
rowKeys[BoxPlotNodeModel.MAX] = new RowKey("Maximum");
for (int row = 0; row < rowKeys.length; row++) {
DataCell[] cells = new DataCell[cont.getTableSpec().getNumColumns()];
for (int i = 0; i < cells.length; i++) {
double[] stats = m_statistics.get(colSpecs[i]);
if (stats == null) {
cells[i] = DataType.getMissingCell();
} else {
cells[i] = new DoubleCell(stats[row]);
}
}
cont.addRowToTable(new DefaultRow(rowKeys[row], cells));
}
cont.close();
return cont;
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class EnrichmentPlotterModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final double rowCount = inData[0].size();
final BufferedDataContainer areaOutCont = exec.createDataContainer(AREA_OUT_SPEC);
final BufferedDataContainer discrateOutCont = exec.createDataContainer(getDiscrateOutSpec());
final double[] fractionSizes = m_settings.getFractionSizes();
for (int i = 0; i < m_settings.getCurveCount(); i++) {
final ExecutionMonitor sexec = exec.createSubProgress(1.0 / m_settings.getCurveCount());
exec.setMessage("Generating curve " + (i + 1));
final Curve c = m_settings.getCurve(i);
final Helper[] curve = new Helper[KnowsRowCountTable.checkRowCount(inData[0].size())];
final int sortIndex = inData[0].getDataTableSpec().findColumnIndex(c.getSortColumn());
final int actIndex = inData[0].getDataTableSpec().findColumnIndex(c.getActivityColumn());
int k = 0, maxK = 0;
for (DataRow row : inData[0]) {
DataCell c1 = row.getCell(sortIndex);
DataCell c2 = row.getCell(actIndex);
if (k++ % 100 == 0) {
sexec.checkCanceled();
sexec.setProgress(k / rowCount);
}
if (c1.isMissing()) {
continue;
} else {
curve[maxK] = new Helper(((DoubleValue) c1).getDoubleValue(), c2);
}
maxK++;
}
Arrays.sort(curve, 0, maxK);
if (c.isSortDescending()) {
for (int j = 0; j < maxK / 2; j++) {
Helper h = curve[j];
curve[j] = curve[maxK - j - 1];
curve[maxK - j - 1] = h;
}
}
// this is for down-sampling so that the view is faster;
// plotting >100,000 points takes quite a long time
final int size = Math.min(MAX_RESOLUTION, maxK);
final double downSampleRate = maxK / (double) size;
final double[] xValues = new double[size + 1];
final double[] yValues = new double[size + 1];
xValues[0] = 0;
yValues[0] = 0;
int lastK = 0;
double y = 0, area = 0;
int nextHitRatePoint = 0;
final double[] hitRateValues = new double[fractionSizes.length];
final HashMap<DataCell, MutableInteger> clusters = new HashMap<DataCell, MutableInteger>();
// set hit rate values for fractions that are smaller than 1 row to 0
while ((maxK * fractionSizes[nextHitRatePoint] / 100) < 1) {
hitRateValues[nextHitRatePoint++] = 0;
}
for (k = 1; k <= maxK; k++) {
final Helper h = curve[k - 1];
if (m_settings.plotMode() == PlotMode.PlotSum) {
y += ((DoubleValue) h.b).getDoubleValue();
} else if (m_settings.plotMode() == PlotMode.PlotHits) {
if (!h.b.isMissing() && (((DoubleValue) h.b).getDoubleValue() >= m_settings.hitThreshold())) {
y++;
}
} else if (!h.b.isMissing()) {
MutableInteger count = clusters.get(h.b);
if (count == null) {
count = new MutableInteger(0);
clusters.put(h.b, count);
}
if (count.inc() == m_settings.minClusterMembers()) {
y++;
}
}
area += y / maxK;
if ((int) (k / downSampleRate) >= lastK + 1) {
lastK++;
xValues[lastK] = k;
yValues[lastK] = y;
}
// thats why this needs to be a while
while ((nextHitRatePoint < fractionSizes.length) && (k == (int) Math.floor(maxK * fractionSizes[nextHitRatePoint] / 100))) {
hitRateValues[nextHitRatePoint] = y;
nextHitRatePoint++;
}
}
xValues[xValues.length - 1] = maxK;
yValues[yValues.length - 1] = y;
area /= y;
m_curves.add(new EnrichmentPlot(c.getSortColumn() + " vs " + c.getActivityColumn(), xValues, yValues, area));
areaOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
for (int j = 0; j < hitRateValues.length; j++) {
hitRateValues[j] /= y;
}
double[] enrichmentFactors = new double[hitRateValues.length];
for (int j = 0; j < enrichmentFactors.length; j++) {
enrichmentFactors[j] = calculateEnrichmentFactor(hitRateValues[j], fractionSizes[j]);
}
discrateOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), ArrayUtils.addAll(hitRateValues, enrichmentFactors)));
}
areaOutCont.close();
discrateOutCont.close();
return new BufferedDataTable[] { areaOutCont.getTable(), discrateOutCont.getTable() };
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class UngroupOperation2 method compute.
/**
* Performs the ungroup operation on the given data table.
*
* @param exec the execution context
* @param table table to perform the ungroup operation on
* @param trans the hilite translater, will be modified directly. Must be non-null if hiliting is enabled, can be
* <code>null</code> otherwise
* @return the table with the ungrouped collections
* @throws CanceledExecutionException if the execution has been canceled
* @throws InterruptedException if the execution has been interrupted
* @throws InvalidSettingsException thrown if the table doesn't contain a collection column at one of the column
* indices to be ungrouped
* @throws IllegalArgumentException if hiliting is enabled and no hilite translater is given
*/
public BufferedDataTable compute(final ExecutionContext exec, final BufferedDataTable table, final HiLiteTranslator trans) throws CanceledExecutionException, InterruptedException, InvalidSettingsException {
final BufferedDataContainer dc = exec.createDataContainer(createTableSpec(table.getDataTableSpec(), m_removeCollectionCol, m_colIndices));
if (table.size() == 0) {
dc.close();
return dc.getTable();
}
DataTableRowInput in = new DataTableRowInput(table);
BufferedDataTableRowOutput out = new BufferedDataTableRowOutput(dc);
try {
compute(in, out, exec, table.size(), trans);
} finally {
in.close();
out.close();
}
return out.getDataTable();
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class ROCCalculator method calculateCurveData.
/**
* Calculates the ROC curve.
* @param table the table with the data
* @param exec the execution context to use for reporting progress
* @throws CanceledExecutionException when the user cancels the execution
*/
public void calculateCurveData(final BufferedDataTable table, final ExecutionContext exec) throws CanceledExecutionException {
m_warningMessage = null;
List<ROCCurve> curves = new ArrayList<ROCCurve>();
int classIndex = table.getDataTableSpec().findColumnIndex(m_classCol);
int curvesSize = m_curves.size();
int size = table.getRowCount();
if (size == 0) {
m_warningMessage = "Input table contains no rows";
}
BufferedDataContainer outCont = exec.createDataContainer(OUT_SPEC);
for (int i = 0; i < curvesSize; i++) {
exec.checkCanceled();
String c = m_curves.get(i);
ExecutionContext subExec = exec.createSubExecutionContext(1.0 / curvesSize);
SortedTable sortedTable = new SortedTable(table, Collections.singletonList(c), new boolean[] { false }, subExec);
subExec.setProgress(1.0);
int tp = 0, fp = 0;
// these contain the coordinates for the plot
double[] xValues = new double[size + 1];
double[] yValues = new double[size + 1];
int k = 0;
final int scoreColIndex = sortedTable.getDataTableSpec().findColumnIndex(c);
DataCell lastScore = null;
for (DataRow row : sortedTable) {
exec.checkCanceled();
DataCell realClass = row.getCell(classIndex);
if (realClass.isMissing() || row.getCell(scoreColIndex).isMissing()) {
if (m_ignoreMissingValues) {
continue;
} else {
m_warningMessage = "Table contains missing values.";
}
}
if (realClass.toString().equals(m_posClass)) {
tp++;
} else {
fp++;
}
// around ... the following lines circumvent this.
if (!row.getCell(scoreColIndex).equals(lastScore)) {
k++;
lastScore = row.getCell(scoreColIndex);
}
xValues[k] = fp;
yValues[k] = tp;
}
xValues = Arrays.copyOf(xValues, k + 1);
yValues = Arrays.copyOf(yValues, k + 1);
for (int j = 0; j <= k; j++) {
xValues[j] /= fp;
yValues[j] /= tp;
}
xValues[xValues.length - 1] = 1;
yValues[yValues.length - 1] = 1;
double area = 0;
for (k = 1; k < xValues.length; k++) {
if (xValues[k - 1] < xValues[k]) {
// magical math: the rectangle + the triangle under
// the segment xValues[k] to xValues[k - 1]
area += 0.5 * (xValues[k] - xValues[k - 1]) * (yValues[k] + yValues[k - 1]);
}
}
curves.add(new ROCCurve(c, xValues, yValues, area, m_maxPoints));
outCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
}
m_outCurves = curves;
outCont.close();
m_outTable = outCont.getTable();
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class ValueCounterNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final int colIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.columnName());
final double max = inData[0].getRowCount();
int rowCount = 0;
Map<DataCell, Set<RowKey>> hlMap = new HashMap<DataCell, Set<RowKey>>();
Map<DataCell, MutableInteger> countMap = new HashMap<DataCell, MutableInteger>();
for (DataRow row : inData[0]) {
exec.checkCanceled();
exec.setProgress(rowCount++ / max, countMap.size() + " different values found");
DataCell cell = row.getCell(colIndex);
MutableInteger count = countMap.get(cell);
if (count == null) {
count = new MutableInteger(0);
countMap.put(cell, count);
}
count.inc();
if (m_settings.hiliting()) {
Set<RowKey> s = hlMap.get(cell);
if (s == null) {
s = new HashSet<RowKey>();
hlMap.put(cell, s);
}
s.add(row.getKey());
}
}
final DataValueComparator comp = inData[0].getDataTableSpec().getColumnSpec(colIndex).getType().getComparator();
List<Map.Entry<DataCell, MutableInteger>> sorted = new ArrayList<Map.Entry<DataCell, MutableInteger>>(countMap.entrySet());
Collections.sort(sorted, new Comparator<Map.Entry<DataCell, MutableInteger>>() {
public int compare(final Map.Entry<DataCell, MutableInteger> o1, final Entry<DataCell, MutableInteger> o2) {
return comp.compare(o1.getKey(), o2.getKey());
}
});
BufferedDataContainer cont = exec.createDataContainer(TABLE_SPEC);
for (Map.Entry<DataCell, MutableInteger> entry : sorted) {
RowKey newKey = new RowKey(entry.getKey().toString());
cont.addRowToTable(new DefaultRow(newKey, new int[] { entry.getValue().intValue() }));
}
cont.close();
if (m_settings.hiliting()) {
Map<RowKey, Set<RowKey>> temp = new HashMap<RowKey, Set<RowKey>>();
for (Map.Entry<DataCell, Set<RowKey>> entry : hlMap.entrySet()) {
RowKey newKey = new RowKey(entry.getKey().toString());
temp.put(newKey, entry.getValue());
}
m_translator.setMapper(new DefaultHiLiteMapper(temp));
}
return new BufferedDataTable[] { cont.getTable() };
}
Aggregations