use of org.knime.base.data.statistics.HistogramModel in project knime-core by knime.
the class ExtendedStatisticsNodeModel method execute.
/**
* {@inheritDoc}
*
* @throws CanceledExecutionException
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws CanceledExecutionException {
double initPercent = m_enableHiLite.getBooleanValue() ? .25 : .2;
ExecutionContext init = exec.createSubExecutionContext(initPercent);
DataTableSpec dataSpec = inData[0].getDataTableSpec();
List<String> includes = nominalColumns(dataSpec);
m_statTable = new Statistics3Table(inData[0], m_computeMedian.getBooleanValue(), numOfNominalValuesOutput(), includes, init);
if (getStatTable().getWarning() != null) {
setWarningMessage(getStatTable().getWarning());
}
BufferedDataTable outTableOccurrences = exec.createBufferedDataTable(getStatTable().createNominalValueTable(includes), exec.createSubProgress(0.5));
BufferedDataTable[] ret = new BufferedDataTable[3];
DataTableSpec newSpec = renamedOccurrencesSpec(outTableOccurrences.getSpec());
ret[2] = exec.createSpecReplacerTable(outTableOccurrences, newSpec);
ExecutionContext table = exec.createSubExecutionContext(initPercent);
ret[0] = getStatTable().createStatisticsInColumnsTable(table);
ExecutionContext histogram = exec.createSubExecutionContext(1.0 / 2);
final HistogramColumn histogramColumn = createHistogramColumn();
HiLiteHandler hlHandler = getEnableHiLite().getBooleanValue() ? getInHiLiteHandler(0) : new HiLiteHandler();
double[] mins = getStatTable().getMin(), maxes = getStatTable().getMax(), means = getStatTable().getMean();
for (int i = 0; i < maxes.length; i++) {
DataCell min = getStatTable().getNonInfMin(i);
if (min.isMissing()) {
mins[i] = Double.NaN;
} else {
mins[i] = ((DoubleValue) min).getDoubleValue();
}
DataCell max = getStatTable().getNonInfMax(i);
if (max.isMissing()) {
maxes[i] = Double.NaN;
} else {
maxes[i] = ((DoubleValue) max).getDoubleValue();
}
}
Pair<BufferedDataTable, Map<Integer, ? extends HistogramModel<?>>> pair = histogramColumn.process(histogram, inData[0], hlHandler, ret[0], mins, maxes, means, numOfNominalValues(), getColumnNames());
// final BufferedDataTable outTable =
// histogramColumn.appendNominal(pair.getFirst(), getStatTable(), hlHandler, exec, numOfNominalValues());
ret[0] = pair.getFirst();
ret[1] = histogramColumn.nominalTable(getStatTable(), hlHandler, exec, numOfNominalValues());
if (m_enableHiLite.getBooleanValue()) {
double rest = 1 - initPercent * 2 - 1.0 / 2;
ExecutionContext projection = exec.createSubExecutionContext(rest / 2);
ColumnRearranger rearranger = new ColumnRearranger(dataSpec);
Set<String> colNames = new HashSet<String>(Arrays.asList(getColumnNames()));
for (DataColumnSpec spec : rearranger.createSpec()) {
if ((!spec.getType().isCompatible(DoubleValue.class) && !spec.getType().isCompatible(NominalValue.class)) || !colNames.contains(spec.getName())) {
rearranger.remove(spec.getName());
}
}
ExecutionContext save = exec.createSubExecutionContext(rest / 2);
m_subTable = new DefaultDataArray(projection.createColumnRearrangeTable(inData[0], rearranger, projection), 1, inData[0].getRowCount(), save);
m_histograms = histogramColumn.histograms(inData[0], getInHiLiteHandler(0), mins, maxes, means, getColumnNames());
Set<String> nominalColumns = new LinkedHashSet<String>();
for (int i = 0; i < inData[0].getSpec().getNumColumns(); ++i) {
Map<DataCell, Integer> nominalValues = getStatTable().getNominalValues(i);
if (nominalValues != null) {
nominalColumns.add(inData[0].getSpec().getColumnSpec(i).getName());
}
}
final Pair<Map<Integer, Map<Integer, Set<RowKey>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> bucketsAndNominals = HistogramColumn.construct(m_histograms, m_subTable, nominalColumns);
m_buckets = bucketsAndNominals.getFirst();
m_nominalKeys = bucketsAndNominals.getSecond();
} else {
m_histograms = pair.getSecond();
}
return ret;
}
Aggregations