Search in sources :

Example 6 with Statistics3Table

use of org.knime.base.data.statistics.Statistics3Table in project knime-core by knime.

the class LowVarFilter2NodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws CanceledExecutionException {
    if (m_conf == null) {
        // auto-guess
        m_conf = createColFilterConf();
    }
    final FilterResult filter = m_conf.applyTo(inData[0].getDataTableSpec());
    String[] includedColumns = filter.getIncludes();
    Statistics3Table statTable = new Statistics3Table(inData[0], false, 0, Collections.<String>emptyList(), exec);
    ArrayList<String> includes = new ArrayList<String>();
    DataTableSpec s = inData[0].getDataTableSpec();
    int colCount = s.getNumColumns();
    double threshold = m_varianceThreshold;
    HashSet<String> includesHash = new HashSet<String>(Arrays.asList(includedColumns));
    for (int i = 0; i < colCount; i++) {
        DataColumnSpec cs = s.getColumnSpec(i);
        if (!includesHash.contains(cs.getName()) || !cs.getType().isCompatible(DoubleValue.class) || statTable.getVariance(i) > threshold) {
            includes.add(cs.getName());
        }
    }
    int filteredOutCount = s.getNumColumns() - includes.size();
    LOGGER.info("Filtered out " + filteredOutCount + " column(s)");
    if (filteredOutCount == 0) {
        setWarningMessage("No columns were filtered out.");
    }
    ColumnRearranger rearranger = new ColumnRearranger(s);
    rearranger.keepOnly(includes.toArray(new String[includes.size()]));
    BufferedDataTable t = exec.createColumnRearrangeTable(inData[0], rearranger, exec);
    return new BufferedDataTable[] { t };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ArrayList(java.util.ArrayList) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DoubleValue(org.knime.core.data.DoubleValue) Statistics3Table(org.knime.base.data.statistics.Statistics3Table) BufferedDataTable(org.knime.core.node.BufferedDataTable) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 7 with Statistics3Table

use of org.knime.base.data.statistics.Statistics3Table in project knime-core by knime.

the class ExtendedStatisticsNodeModel method execute.

/**
 * {@inheritDoc}
 *
 * @throws CanceledExecutionException
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws CanceledExecutionException {
    double initPercent = m_enableHiLite.getBooleanValue() ? .25 : .2;
    ExecutionContext init = exec.createSubExecutionContext(initPercent);
    DataTableSpec dataSpec = inData[0].getDataTableSpec();
    List<String> includes = nominalColumns(dataSpec);
    m_statTable = new Statistics3Table(inData[0], m_computeMedian.getBooleanValue(), numOfNominalValuesOutput(), includes, init);
    if (getStatTable().getWarning() != null) {
        setWarningMessage(getStatTable().getWarning());
    }
    BufferedDataTable outTableOccurrences = exec.createBufferedDataTable(getStatTable().createNominalValueTable(includes), exec.createSubProgress(0.5));
    BufferedDataTable[] ret = new BufferedDataTable[3];
    DataTableSpec newSpec = renamedOccurrencesSpec(outTableOccurrences.getSpec());
    ret[2] = exec.createSpecReplacerTable(outTableOccurrences, newSpec);
    ExecutionContext table = exec.createSubExecutionContext(initPercent);
    ret[0] = getStatTable().createStatisticsInColumnsTable(table);
    ExecutionContext histogram = exec.createSubExecutionContext(1.0 / 2);
    final HistogramColumn histogramColumn = createHistogramColumn();
    HiLiteHandler hlHandler = getEnableHiLite().getBooleanValue() ? getInHiLiteHandler(0) : new HiLiteHandler();
    double[] mins = getStatTable().getMin(), maxes = getStatTable().getMax(), means = getStatTable().getMean();
    for (int i = 0; i < maxes.length; i++) {
        DataCell min = getStatTable().getNonInfMin(i);
        if (min.isMissing()) {
            mins[i] = Double.NaN;
        } else {
            mins[i] = ((DoubleValue) min).getDoubleValue();
        }
        DataCell max = getStatTable().getNonInfMax(i);
        if (max.isMissing()) {
            maxes[i] = Double.NaN;
        } else {
            maxes[i] = ((DoubleValue) max).getDoubleValue();
        }
    }
    Pair<BufferedDataTable, Map<Integer, ? extends HistogramModel<?>>> pair = histogramColumn.process(histogram, inData[0], hlHandler, ret[0], mins, maxes, means, numOfNominalValues(), getColumnNames());
    // final BufferedDataTable outTable =
    // histogramColumn.appendNominal(pair.getFirst(), getStatTable(), hlHandler, exec, numOfNominalValues());
    ret[0] = pair.getFirst();
    ret[1] = histogramColumn.nominalTable(getStatTable(), hlHandler, exec, numOfNominalValues());
    if (m_enableHiLite.getBooleanValue()) {
        double rest = 1 - initPercent * 2 - 1.0 / 2;
        ExecutionContext projection = exec.createSubExecutionContext(rest / 2);
        ColumnRearranger rearranger = new ColumnRearranger(dataSpec);
        Set<String> colNames = new HashSet<String>(Arrays.asList(getColumnNames()));
        for (DataColumnSpec spec : rearranger.createSpec()) {
            if ((!spec.getType().isCompatible(DoubleValue.class) && !spec.getType().isCompatible(NominalValue.class)) || !colNames.contains(spec.getName())) {
                rearranger.remove(spec.getName());
            }
        }
        ExecutionContext save = exec.createSubExecutionContext(rest / 2);
        m_subTable = new DefaultDataArray(projection.createColumnRearrangeTable(inData[0], rearranger, projection), 1, inData[0].getRowCount(), save);
        m_histograms = histogramColumn.histograms(inData[0], getInHiLiteHandler(0), mins, maxes, means, getColumnNames());
        Set<String> nominalColumns = new LinkedHashSet<String>();
        for (int i = 0; i < inData[0].getSpec().getNumColumns(); ++i) {
            Map<DataCell, Integer> nominalValues = getStatTable().getNominalValues(i);
            if (nominalValues != null) {
                nominalColumns.add(inData[0].getSpec().getColumnSpec(i).getName());
            }
        }
        final Pair<Map<Integer, Map<Integer, Set<RowKey>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> bucketsAndNominals = HistogramColumn.construct(m_histograms, m_subTable, nominalColumns);
        m_buckets = bucketsAndNominals.getFirst();
        m_nominalKeys = bucketsAndNominals.getSecond();
    } else {
        m_histograms = pair.getSecond();
    }
    return ret;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) HiLiteHandler(org.knime.core.node.property.hilite.HiLiteHandler) Set(java.util.Set) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) DataValue(org.knime.core.data.DataValue) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) HistogramColumn(org.knime.base.data.statistics.HistogramColumn) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) HistogramModel(org.knime.base.data.statistics.HistogramModel) SettingsModelInteger(org.knime.core.node.defaultnodesettings.SettingsModelInteger) ExecutionContext(org.knime.core.node.ExecutionContext) Statistics3Table(org.knime.base.data.statistics.Statistics3Table) DataCell(org.knime.core.data.DataCell) Map(java.util.Map) HashMap(java.util.HashMap)

Example 8 with Statistics3Table

use of org.knime.base.data.statistics.Statistics3Table in project knime-core by knime.

the class ExtendedStatisticsHTMLNodeView method renderNominalRow.

/**
 * Create HTML for the given row.
 */
private void renderNominalRow(final int row, final StringBuilder buffer, final String cssClass) {
    Statistics3Table model = getNodeModel().getStatTable();
    buffer.append("<tr class=\"" + cssClass + "\">\n");
    buffer.append("<td valign=\"" + ROW_VERTICAL_ALIGN + "\">");
    buffer.append(escapeHtml(model.getSpec().getColumnSpec(row).getName()));
    buffer.append("</td>");
    buffer.append("<td class=\"numeric\" valign=\"" + ROW_VERTICAL_ALIGN + "\">");
    buffer.append(NumberFormat.getInstance().format((long) model.getNumberMissingValues(row)));
    buffer.append("</td>\n");
    buffer.append("<td><object classid=\"org.knime.base.data.statistics.HistogramColumn.HistogramComponent\" colId=\"").append(row).append("\" width=\"").append(getNodeModel().getHistogramWidth()).append("\" height=\"").append(getNodeModel().getHistogramHeight()).append("\"></object></td>");
    buffer.append("</tr>\n");
}
Also used : Statistics3Table(org.knime.base.data.statistics.Statistics3Table)

Example 9 with Statistics3Table

use of org.knime.base.data.statistics.Statistics3Table in project knime-core by knime.

the class ExtendedStatisticsHTMLNodeView method renderRow.

/**
 * Create HTML for the given row.
 */
private void renderRow(final int row, final StringBuilder buffer, final String cssClass) {
    // boolean first = true;
    Statistics3Table model = getNodeModel().getStatTable();
    buffer.append("<tr class=\"" + cssClass + "\">\n");
    buffer.append("<td valign=\"" + ROW_VERTICAL_ALIGN + "\">");
    buffer.append(escapeHtml(model.getSpec().getColumnSpec(row).getName()));
    buffer.append("</td>");
    for (double v : new double[] { model.getMin()[row], model.getMean(row), model.getMedian(row), model.getMax()[row], model.getStandardDeviation(row), model.getSkewness(row), model.getKurtosis(row) }) {
        buffer.append("<td class=\"numeric\" valign=\"" + ROW_VERTICAL_ALIGN + "\">");
        buffer.append(Double.isNaN(v) ? "?" : DoubleFormat.formatDouble(v));
        buffer.append("</td>\n");
    }
    NumberFormat nf = NumberFormat.getInstance();
    for (int v : new int[] { model.getNumberMissingValues()[row], model.getNumberPositiveInfiniteValues(row), model.getNumberNegativeInfiniteValues(row) }) {
        buffer.append("<td class=\"numeric\" valign=\"" + ROW_VERTICAL_ALIGN + "\">");
        buffer.append(nf.format(v));
        buffer.append("</td>\n");
    }
    buffer.append("<td><object classid=\"org.knime.base.data.statistics.HistogramColumn.HistogramComponent\" colId=\"").append(row).append("\" width=\"").append(getNodeModel().getHistogramWidth()).append("\" height=\"").append(getNodeModel().getHistogramHeight()).append("\"></object></td>");
    buffer.append("</tr>\n");
}
Also used : Statistics3Table(org.knime.base.data.statistics.Statistics3Table) NumberFormat(java.text.NumberFormat)

Example 10 with Statistics3Table

use of org.knime.base.data.statistics.Statistics3Table in project knime-core by knime.

the class ExtendedStatisticsHTMLNodeView method renderNominal.

/**
 * @param model An {@link ExtendedStatisticsNodeModel}.
 * @return A {@link StringBuilder} with all the stats added for nominal HTML table.
 */
private StringBuilder renderNominal(final ExtendedStatisticsNodeModel model) {
    StringBuilder ret = createHtmlHeader();
    ret.append("<body>\n");
    if (null != getNodeModel()) {
        ret.append("<table>\n");
        ret.append("<tr>");
        for (Iterator<String> iter = Arrays.asList("Column", "No. missings", "Histogram").iterator(); iter.hasNext(); ) {
            ret.append("<th class=\"left\">");
            String prop = iter.next();
            ret.append(prop);
            ret.append("</th>");
        }
        ret.append("</tr>");
        int i = 0, colIdx = 0;
        Statistics3Table statTable = model.getStatTable();
        if (statTable != null) {
            for (DataColumnSpec spec : statTable.getSpec()) {
                if (model.getStatTable().getNominalValues(colIdx) != null) {
                    String cssClass = i % 2 == 0 ? "even" : "odd";
                    int columnIndex = model.getStatTable().getSpec().findColumnIndex(spec.getName());
                    renderNominalRow(columnIndex, ret, cssClass);
                    i++;
                }
                ++colIdx;
            }
        }
        ret.append("</table>\n");
    } else {
        ret.append("No data available.\n");
    }
    ret.append("</body>\n");
    ret.append("</html>\n");
    return ret;
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) Statistics3Table(org.knime.base.data.statistics.Statistics3Table)

Aggregations

Statistics3Table (org.knime.base.data.statistics.Statistics3Table)11 DataColumnSpec (org.knime.core.data.DataColumnSpec)6 DataTableSpec (org.knime.core.data.DataTableSpec)4 BufferedDataTable (org.knime.core.node.BufferedDataTable)4 DataCell (org.knime.core.data.DataCell)3 ExecutionContext (org.knime.core.node.ExecutionContext)3 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 LinkedHashSet (java.util.LinkedHashSet)2 Map (java.util.Map)2 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)2 NumberFormat (java.text.NumberFormat)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 Set (java.util.Set)1 HistogramColumn (org.knime.base.data.statistics.HistogramColumn)1 HistogramModel (org.knime.base.data.statistics.HistogramModel)1 DefaultDataArray (org.knime.base.node.util.DefaultDataArray)1 DataColumnDomain (org.knime.core.data.DataColumnDomain)1 DataValue (org.knime.core.data.DataValue)1