Search in sources :

Example 1 with HistogramColumn

use of org.knime.base.data.statistics.HistogramColumn in project knime-core by knime.

the class ExtendedStatisticsNodeModel method configure.

/**
 * {@inheritDoc}
 */
@SuppressWarnings({ "unchecked", "deprecation" })
@Override
protected DataTableSpec[] configure(final DataTableSpec[] inSpecs) throws InvalidSettingsException {
    final DataTableSpec inputSpec = inSpecs[0];
    if (!m_hasSettings) {
        m_nominalFilter.loadDefaults(inputSpec, new DataTypeColumnFilter(NominalValue.class, StringValue.class, IntValue.class, LongValue.class, BooleanValue.class), true);
    }
    List<String> nominalValues = Arrays.asList(m_nominalFilter.applyTo(inputSpec).getIncludes());
    DataTableSpec nominalSpec = Statistics3Table.createOutSpecNominal(inputSpec, nominalValues);
    nominalSpec = renamedOccurrencesSpec(nominalSpec);
    DataTableSpec[] ret = new DataTableSpec[3];
    DataTableSpecCreator specCreator = new DataTableSpecCreator(Statistics3Table.getStatisticsSpecification());
    final HistogramColumn hc = createHistogramColumn();
    final DataColumnSpec histogramColumnSpec = hc.createHistogramColumnSpec();
    specCreator.addColumns(histogramColumnSpec);
    ret[0] = specCreator.createSpec();
    ret[1] = hc.createNominalHistogramTableSpec();
    ret[2] = nominalSpec;
    return ret;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataTypeColumnFilter(org.knime.core.node.util.filter.column.DataTypeColumnFilter) NominalValue(org.knime.core.data.NominalValue) DataTableSpecCreator(org.knime.core.data.DataTableSpecCreator) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) HistogramColumn(org.knime.base.data.statistics.HistogramColumn) DataColumnSpec(org.knime.core.data.DataColumnSpec) BooleanValue(org.knime.core.data.BooleanValue) LongValue(org.knime.core.data.LongValue) StringValue(org.knime.core.data.StringValue) IntValue(org.knime.core.data.IntValue)

Example 2 with HistogramColumn

use of org.knime.base.data.statistics.HistogramColumn in project knime-core by knime.

the class ExtendedStatisticsNodeModel method execute.

/**
 * {@inheritDoc}
 *
 * @throws CanceledExecutionException
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws CanceledExecutionException {
    double initPercent = m_enableHiLite.getBooleanValue() ? .25 : .2;
    ExecutionContext init = exec.createSubExecutionContext(initPercent);
    DataTableSpec dataSpec = inData[0].getDataTableSpec();
    List<String> includes = nominalColumns(dataSpec);
    m_statTable = new Statistics3Table(inData[0], m_computeMedian.getBooleanValue(), numOfNominalValuesOutput(), includes, init);
    if (getStatTable().getWarning() != null) {
        setWarningMessage(getStatTable().getWarning());
    }
    BufferedDataTable outTableOccurrences = exec.createBufferedDataTable(getStatTable().createNominalValueTable(includes), exec.createSubProgress(0.5));
    BufferedDataTable[] ret = new BufferedDataTable[3];
    DataTableSpec newSpec = renamedOccurrencesSpec(outTableOccurrences.getSpec());
    ret[2] = exec.createSpecReplacerTable(outTableOccurrences, newSpec);
    ExecutionContext table = exec.createSubExecutionContext(initPercent);
    ret[0] = getStatTable().createStatisticsInColumnsTable(table);
    ExecutionContext histogram = exec.createSubExecutionContext(1.0 / 2);
    final HistogramColumn histogramColumn = createHistogramColumn();
    HiLiteHandler hlHandler = getEnableHiLite().getBooleanValue() ? getInHiLiteHandler(0) : new HiLiteHandler();
    double[] mins = getStatTable().getMin(), maxes = getStatTable().getMax(), means = getStatTable().getMean();
    for (int i = 0; i < maxes.length; i++) {
        DataCell min = getStatTable().getNonInfMin(i);
        if (min.isMissing()) {
            mins[i] = Double.NaN;
        } else {
            mins[i] = ((DoubleValue) min).getDoubleValue();
        }
        DataCell max = getStatTable().getNonInfMax(i);
        if (max.isMissing()) {
            maxes[i] = Double.NaN;
        } else {
            maxes[i] = ((DoubleValue) max).getDoubleValue();
        }
    }
    Pair<BufferedDataTable, Map<Integer, ? extends HistogramModel<?>>> pair = histogramColumn.process(histogram, inData[0], hlHandler, ret[0], mins, maxes, means, numOfNominalValues(), getColumnNames());
    // final BufferedDataTable outTable =
    // histogramColumn.appendNominal(pair.getFirst(), getStatTable(), hlHandler, exec, numOfNominalValues());
    ret[0] = pair.getFirst();
    ret[1] = histogramColumn.nominalTable(getStatTable(), hlHandler, exec, numOfNominalValues());
    if (m_enableHiLite.getBooleanValue()) {
        double rest = 1 - initPercent * 2 - 1.0 / 2;
        ExecutionContext projection = exec.createSubExecutionContext(rest / 2);
        ColumnRearranger rearranger = new ColumnRearranger(dataSpec);
        Set<String> colNames = new HashSet<String>(Arrays.asList(getColumnNames()));
        for (DataColumnSpec spec : rearranger.createSpec()) {
            if ((!spec.getType().isCompatible(DoubleValue.class) && !spec.getType().isCompatible(NominalValue.class)) || !colNames.contains(spec.getName())) {
                rearranger.remove(spec.getName());
            }
        }
        ExecutionContext save = exec.createSubExecutionContext(rest / 2);
        m_subTable = new DefaultDataArray(projection.createColumnRearrangeTable(inData[0], rearranger, projection), 1, inData[0].getRowCount(), save);
        m_histograms = histogramColumn.histograms(inData[0], getInHiLiteHandler(0), mins, maxes, means, getColumnNames());
        Set<String> nominalColumns = new LinkedHashSet<String>();
        for (int i = 0; i < inData[0].getSpec().getNumColumns(); ++i) {
            Map<DataCell, Integer> nominalValues = getStatTable().getNominalValues(i);
            if (nominalValues != null) {
                nominalColumns.add(inData[0].getSpec().getColumnSpec(i).getName());
            }
        }
        final Pair<Map<Integer, Map<Integer, Set<RowKey>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> bucketsAndNominals = HistogramColumn.construct(m_histograms, m_subTable, nominalColumns);
        m_buckets = bucketsAndNominals.getFirst();
        m_nominalKeys = bucketsAndNominals.getSecond();
    } else {
        m_histograms = pair.getSecond();
    }
    return ret;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) HiLiteHandler(org.knime.core.node.property.hilite.HiLiteHandler) Set(java.util.Set) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) DataValue(org.knime.core.data.DataValue) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) HistogramColumn(org.knime.base.data.statistics.HistogramColumn) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) HistogramModel(org.knime.base.data.statistics.HistogramModel) SettingsModelInteger(org.knime.core.node.defaultnodesettings.SettingsModelInteger) ExecutionContext(org.knime.core.node.ExecutionContext) Statistics3Table(org.knime.base.data.statistics.Statistics3Table) DataCell(org.knime.core.data.DataCell) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

HistogramColumn (org.knime.base.data.statistics.HistogramColumn)2 DataColumnSpec (org.knime.core.data.DataColumnSpec)2 DataTableSpec (org.knime.core.data.DataTableSpec)2 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)2 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 LinkedHashSet (java.util.LinkedHashSet)1 Map (java.util.Map)1 Set (java.util.Set)1 HistogramModel (org.knime.base.data.statistics.HistogramModel)1 Statistics3Table (org.knime.base.data.statistics.Statistics3Table)1 DefaultDataArray (org.knime.base.node.util.DefaultDataArray)1 BooleanValue (org.knime.core.data.BooleanValue)1 DataCell (org.knime.core.data.DataCell)1 DataTableSpecCreator (org.knime.core.data.DataTableSpecCreator)1 DataValue (org.knime.core.data.DataValue)1 IntValue (org.knime.core.data.IntValue)1 LongValue (org.knime.core.data.LongValue)1 NominalValue (org.knime.core.data.NominalValue)1 StringValue (org.knime.core.data.StringValue)1