use of org.knime.core.util.MutableInteger in project knime-core by knime.
the class OneWayANOVAStatistics method getGroupsTotalStatistics.
/**
* Get descriptive statistics for all groups.
* @return the descriptive statistics for all groups
*/
public List<DataCell> getGroupsTotalStatistics() {
List<DataCell> cells = new ArrayList<DataCell>();
cells.add(new StringCell(m_column));
cells.add(new StringCell("Total"));
SummaryStatistics stats = m_stats;
cells.add(new IntCell((int) stats.getN()));
int missingCount = 0;
for (MutableInteger m : m_missing) {
missingCount += m.intValue();
}
cells.add(new IntCell(missingCount));
cells.add(new IntCell(m_missingGroup.intValue()));
cells.add(new DoubleCell(stats.getMean()));
cells.add(new DoubleCell(stats.getStandardDeviation()));
cells.add(new DoubleCell(StatsUtil.getStandardError(stats)));
cells.add(new DoubleCell(m_confidenceIntervalProp));
long df = stats.getN() - 1;
TDistribution distribution = new TDistribution(df);
double tValue = FastMath.abs(distribution.inverseCumulativeProbability((1 - m_confidenceIntervalProp) / 2));
double confidenceDelta = tValue * StatsUtil.getStandardError(stats);
double confidenceLowerBound = stats.getMean() - confidenceDelta;
double confidenceUpperBound = stats.getMean() + confidenceDelta;
cells.add(new DoubleCell(confidenceLowerBound));
cells.add(new DoubleCell(confidenceUpperBound));
cells.add(new DoubleCell(stats.getMin()));
cells.add(new DoubleCell(stats.getMax()));
return cells;
}
use of org.knime.core.util.MutableInteger in project knime-core by knime.
the class EnrichmentPlotterModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final double rowCount = inData[0].size();
final BufferedDataContainer areaOutCont = exec.createDataContainer(AREA_OUT_SPEC);
final BufferedDataContainer discrateOutCont = exec.createDataContainer(DISCRATE_OUT_SPEC);
for (int i = 0; i < m_settings.getCurveCount(); i++) {
final ExecutionMonitor sexec = exec.createSubProgress(1.0 / m_settings.getCurveCount());
exec.setMessage("Generating curve " + (i + 1));
final Curve c = m_settings.getCurve(i);
final Helper[] curve = new Helper[KnowsRowCountTable.checkRowCount(inData[0].size())];
final int sortIndex = inData[0].getDataTableSpec().findColumnIndex(c.getSortColumn());
final int actIndex = inData[0].getDataTableSpec().findColumnIndex(c.getActivityColumn());
int k = 0, maxK = 0;
for (DataRow row : inData[0]) {
DataCell c1 = row.getCell(sortIndex);
DataCell c2 = row.getCell(actIndex);
if (k++ % 100 == 0) {
sexec.checkCanceled();
sexec.setProgress(k / rowCount);
}
if (c1.isMissing()) {
continue;
} else {
curve[maxK] = new Helper(((DoubleValue) c1).getDoubleValue(), c2);
}
maxK++;
}
Arrays.sort(curve, 0, maxK);
if (c.isSortDescending()) {
for (int j = 0; j < maxK / 2; j++) {
Helper h = curve[j];
curve[j] = curve[maxK - j - 1];
curve[maxK - j - 1] = h;
}
}
// this is for down-sampling so that the view is faster;
// plotting >100,000 points takes quite a long time
final int size = Math.min(MAX_RESOLUTION, maxK);
final double downSampleRate = maxK / (double) size;
final double[] xValues = new double[size + 1];
final double[] yValues = new double[size + 1];
xValues[0] = 0;
yValues[0] = 0;
int lastK = 0;
double y = 0, area = 0;
int nextHitRatePoint = 0;
final double[] hitRateValues = new double[DISCRATE_POINTS.length];
final HashMap<DataCell, MutableInteger> clusters = new HashMap<DataCell, MutableInteger>();
for (k = 1; k <= maxK; k++) {
final Helper h = curve[k - 1];
if (m_settings.plotMode() == PlotMode.PlotSum) {
y += ((DoubleValue) h.b).getDoubleValue();
} else if (m_settings.plotMode() == PlotMode.PlotHits) {
if (!h.b.isMissing() && (((DoubleValue) h.b).getDoubleValue() >= m_settings.hitThreshold())) {
y++;
}
} else if (!h.b.isMissing()) {
MutableInteger count = clusters.get(h.b);
if (count == null) {
count = new MutableInteger(0);
clusters.put(h.b, count);
}
if (count.inc() == m_settings.minClusterMembers()) {
y++;
}
}
area += y / maxK;
if ((int) (k / downSampleRate) >= lastK + 1) {
lastK++;
xValues[lastK] = k;
yValues[lastK] = y;
}
if ((nextHitRatePoint < DISCRATE_POINTS.length) && (k == (int) Math.floor(maxK * DISCRATE_POINTS[nextHitRatePoint] / 100))) {
hitRateValues[nextHitRatePoint] = y;
nextHitRatePoint++;
}
}
xValues[xValues.length - 1] = maxK;
yValues[yValues.length - 1] = y;
area /= y;
m_curves.add(new EnrichmentPlot(c.getSortColumn() + " vs " + c.getActivityColumn(), xValues, yValues, area));
areaOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
for (int j = 0; j < hitRateValues.length; j++) {
hitRateValues[j] /= y;
}
discrateOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), hitRateValues));
}
areaOutCont.close();
discrateOutCont.close();
return new BufferedDataTable[] { areaOutCont.getTable(), discrateOutCont.getTable() };
}
use of org.knime.core.util.MutableInteger in project knime-core by knime.
the class ClassAttributeModel method toString.
/**
* {@inheritDoc}
*/
@Override
public String toString() {
final StringBuilder buf = new StringBuilder();
buf.append("Attribute name: ");
buf.append(getAttributeName());
buf.append("\t");
buf.append("No of records: ");
buf.append(m_totalNoOfRecs);
buf.append("\n");
for (final String classVal : m_recsCounterByClassVal.keySet()) {
final MutableInteger integer = m_recsCounterByClassVal.get(classVal);
buf.append(classVal);
buf.append("|");
buf.append(integer.intValue());
buf.append("\n");
}
return buf.toString();
}
use of org.knime.core.util.MutableInteger in project knime-core by knime.
the class KnnNodeModel method createRearranger.
/*
* Creates a column rearranger. NOTE: This call possibly involves heavier calculations since the kd-tree is determined here based on the training data.
* @param numRowsTable2 - can be -1 if can't be determined (streaming)
*/
private ColumnRearranger createRearranger(final BufferedDataTable trainData, final DataTableSpec inSpec2, final ExecutionContext exec, final long numRowsTable2) throws CanceledExecutionException, InvalidSettingsException {
int classColIndex = trainData.getDataTableSpec().findColumnIndex(m_settings.classColumn());
if (classColIndex == -1) {
throw new InvalidSettingsException("Invalid class column chosen.");
}
List<Integer> featureColumns = new ArrayList<Integer>();
Map<Integer, Integer> firstToSecond = new HashMap<Integer, Integer>();
checkInputTables(new DataTableSpec[] { trainData.getDataTableSpec(), inSpec2 }, featureColumns, firstToSecond);
KDTreeBuilder<DataCell> treeBuilder = new KDTreeBuilder<DataCell>(featureColumns.size());
int count = 0;
for (DataRow currentRow : trainData) {
exec.checkCanceled();
exec.setProgress(0.1 * count * trainData.size(), "Reading row " + currentRow.getKey());
double[] features = createFeatureVector(currentRow, featureColumns);
if (features == null) {
setWarningMessage("Input table contains missing values, the " + "affected rows are ignored.");
} else {
DataCell thisClassCell = currentRow.getCell(classColIndex);
// and finally add data
treeBuilder.addPattern(features, thisClassCell);
// compute the majority class for breaking possible ties later
MutableInteger t = m_classDistribution.get(thisClassCell);
if (t == null) {
m_classDistribution.put(thisClassCell, new MutableInteger(1));
} else {
t.inc();
}
}
}
// and now use it to classify the test data...
DataColumnSpec classColumnSpec = trainData.getDataTableSpec().getColumnSpec(classColIndex);
exec.setMessage("Building kd-tree");
KDTree<DataCell> tree = treeBuilder.buildTree(exec.createSubProgress(0.3));
if (tree.size() < m_settings.k()) {
setWarningMessage("There are only " + tree.size() + " patterns in the input table, but " + m_settings.k() + " nearest neighbours were requested for classification." + " The prediction will be the majority class for all" + " input patterns.");
}
exec.setMessage("Classifying");
ColumnRearranger c = createRearranger(inSpec2, classColumnSpec, featureColumns, firstToSecond, tree, numRowsTable2);
return c;
}
use of org.knime.core.util.MutableInteger in project knime-core by knime.
the class BigGroupByTable method createGroupByTable.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final DataTableSpec resultSpec, final int[] groupColIdx) throws CanceledExecutionException {
LOGGER.debug("Entering createGroupByTable(exec, table) " + "of class BigGroupByTable.");
final DataTableSpec origSpec = table.getDataTableSpec();
// sort the data table in order to process the input table chunk wise
final BufferedDataTable sortedTable;
final ExecutionContext groupExec;
final DataValueComparator[] comparators;
if (groupColIdx.length < 1) {
sortedTable = table;
groupExec = exec;
comparators = new DataValueComparator[0];
} else {
final ExecutionContext sortExec = exec.createSubExecutionContext(0.6);
exec.setMessage("Sorting input table...");
sortedTable = sortTable(sortExec, table, getGroupCols());
sortExec.setProgress(1.0);
groupExec = exec.createSubExecutionContext(0.4);
comparators = new DataValueComparator[groupColIdx.length];
for (int i = 0, length = groupColIdx.length; i < length; i++) {
final DataColumnSpec colSpec = origSpec.getColumnSpec(groupColIdx[i]);
comparators[i] = colSpec.getType().getComparator();
}
}
final BufferedDataContainer dc = exec.createDataContainer(resultSpec);
exec.setMessage("Creating groups");
final DataCell[] previousGroup = new DataCell[groupColIdx.length];
final DataCell[] currentGroup = new DataCell[groupColIdx.length];
final MutableInteger groupCounter = new MutableInteger(0);
boolean firstRow = true;
final double numOfRows = sortedTable.size();
long rowCounter = 0;
// In the rare case that the DataCell comparator return 0 for two
// data cells that are not equal we have to maintain a map with all
// rows with equal cells in the group columns per chunk.
// This variable stores for each chunk these members. A chunk consists
// of rows which return 0 for the pairwise group value comparison.
// Usually only equal data cells return 0 when compared with each other
// but in rare occasions also data cells that are NOT equal return 0 when
// compared to each other
// (such as cells that contain chemical structures).
// In this rare case this map will contain for each group of data cells
// that are pairwise equal in the chunk a separate entry.
final Map<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> chunkMembers = new LinkedHashMap<>(3);
boolean logUnusualCells = true;
String groupLabel = "";
// cannot put init to the constructor, as the super() constructor directly calls the current function
initMissingValuesMap();
for (final DataRow row : sortedTable) {
// fetch the current group column values
for (int i = 0, length = groupColIdx.length; i < length; i++) {
currentGroup[i] = row.getCell(groupColIdx[i]);
}
if (firstRow) {
groupLabel = createGroupLabelForProgress(currentGroup);
System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
firstRow = false;
}
// group column data cells
if (!sameChunk(comparators, previousGroup, currentGroup)) {
groupLabel = createGroupLabelForProgress(currentGroup);
createTableRows(dc, chunkMembers, groupCounter);
// set the current group as previous group
System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
if (logUnusualCells && chunkMembers.size() > 1) {
// cause the problem
if (LOGGER.isEnabledFor(LEVEL.INFO)) {
final StringBuilder buf = new StringBuilder();
buf.append("Data chunk with ");
buf.append(chunkMembers.size());
buf.append(" members occured in groupby node. " + "Involved classes are: ");
final GroupKey key = chunkMembers.keySet().iterator().next();
for (final DataCell cell : key.getGroupVals()) {
buf.append(cell.getClass().getCanonicalName());
buf.append(", ");
}
LOGGER.info(buf.toString());
}
logUnusualCells = false;
}
// reset the chunk members map
chunkMembers.clear();
}
// process the row as one of the members of the current chunk
Pair<ColumnAggregator[], Set<RowKey>> member = chunkMembers.get(new GroupKey(currentGroup));
if (member == null) {
Set<RowKey> rowKeys;
if (isEnableHilite()) {
rowKeys = new HashSet<>();
} else {
rowKeys = Collections.emptySet();
}
member = new Pair<>(cloneColumnAggregators(), rowKeys);
final DataCell[] groupKeys = new DataCell[currentGroup.length];
System.arraycopy(currentGroup, 0, groupKeys, 0, currentGroup.length);
chunkMembers.put(new GroupKey(groupKeys), member);
}
// compute the current row values
for (final ColumnAggregator colAggr : member.getFirst()) {
final int colIdx = origSpec.findColumnIndex(colAggr.getOriginalColName());
colAggr.getOperator(getGlobalSettings()).compute(row, colIdx);
}
if (isEnableHilite()) {
member.getSecond().add(row.getKey());
}
groupExec.checkCanceled();
groupExec.setProgress(++rowCounter / numOfRows, groupLabel);
}
// create the final row for the last chunk after processing the last
// table row
createTableRows(dc, chunkMembers, groupCounter);
dc.close();
return dc.getTable();
}
Aggregations