use of org.knime.base.data.statistics.Statistics3Table in project knime-core by knime.
the class LowVarFilter2NodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws CanceledExecutionException {
if (m_conf == null) {
// auto-guess
m_conf = createColFilterConf();
}
final FilterResult filter = m_conf.applyTo(inData[0].getDataTableSpec());
String[] includedColumns = filter.getIncludes();
Statistics3Table statTable = new Statistics3Table(inData[0], false, 0, Collections.<String>emptyList(), exec);
ArrayList<String> includes = new ArrayList<String>();
DataTableSpec s = inData[0].getDataTableSpec();
int colCount = s.getNumColumns();
double threshold = m_varianceThreshold;
HashSet<String> includesHash = new HashSet<String>(Arrays.asList(includedColumns));
for (int i = 0; i < colCount; i++) {
DataColumnSpec cs = s.getColumnSpec(i);
if (!includesHash.contains(cs.getName()) || !cs.getType().isCompatible(DoubleValue.class) || statTable.getVariance(i) > threshold) {
includes.add(cs.getName());
}
}
int filteredOutCount = s.getNumColumns() - includes.size();
LOGGER.info("Filtered out " + filteredOutCount + " column(s)");
if (filteredOutCount == 0) {
setWarningMessage("No columns were filtered out.");
}
ColumnRearranger rearranger = new ColumnRearranger(s);
rearranger.keepOnly(includes.toArray(new String[includes.size()]));
BufferedDataTable t = exec.createColumnRearrangeTable(inData[0], rearranger, exec);
return new BufferedDataTable[] { t };
}
use of org.knime.base.data.statistics.Statistics3Table in project knime-core by knime.
the class ExtendedStatisticsNodeModel method execute.
/**
* {@inheritDoc}
*
* @throws CanceledExecutionException
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws CanceledExecutionException {
double initPercent = m_enableHiLite.getBooleanValue() ? .25 : .2;
ExecutionContext init = exec.createSubExecutionContext(initPercent);
DataTableSpec dataSpec = inData[0].getDataTableSpec();
List<String> includes = nominalColumns(dataSpec);
m_statTable = new Statistics3Table(inData[0], m_computeMedian.getBooleanValue(), numOfNominalValuesOutput(), includes, init);
if (getStatTable().getWarning() != null) {
setWarningMessage(getStatTable().getWarning());
}
BufferedDataTable outTableOccurrences = exec.createBufferedDataTable(getStatTable().createNominalValueTable(includes), exec.createSubProgress(0.5));
BufferedDataTable[] ret = new BufferedDataTable[3];
DataTableSpec newSpec = renamedOccurrencesSpec(outTableOccurrences.getSpec());
ret[2] = exec.createSpecReplacerTable(outTableOccurrences, newSpec);
ExecutionContext table = exec.createSubExecutionContext(initPercent);
ret[0] = getStatTable().createStatisticsInColumnsTable(table);
ExecutionContext histogram = exec.createSubExecutionContext(1.0 / 2);
final HistogramColumn histogramColumn = createHistogramColumn();
HiLiteHandler hlHandler = getEnableHiLite().getBooleanValue() ? getInHiLiteHandler(0) : new HiLiteHandler();
double[] mins = getStatTable().getMin(), maxes = getStatTable().getMax(), means = getStatTable().getMean();
for (int i = 0; i < maxes.length; i++) {
DataCell min = getStatTable().getNonInfMin(i);
if (min.isMissing()) {
mins[i] = Double.NaN;
} else {
mins[i] = ((DoubleValue) min).getDoubleValue();
}
DataCell max = getStatTable().getNonInfMax(i);
if (max.isMissing()) {
maxes[i] = Double.NaN;
} else {
maxes[i] = ((DoubleValue) max).getDoubleValue();
}
}
Pair<BufferedDataTable, Map<Integer, ? extends HistogramModel<?>>> pair = histogramColumn.process(histogram, inData[0], hlHandler, ret[0], mins, maxes, means, numOfNominalValues(), getColumnNames());
// final BufferedDataTable outTable =
// histogramColumn.appendNominal(pair.getFirst(), getStatTable(), hlHandler, exec, numOfNominalValues());
ret[0] = pair.getFirst();
ret[1] = histogramColumn.nominalTable(getStatTable(), hlHandler, exec, numOfNominalValues());
if (m_enableHiLite.getBooleanValue()) {
double rest = 1 - initPercent * 2 - 1.0 / 2;
ExecutionContext projection = exec.createSubExecutionContext(rest / 2);
ColumnRearranger rearranger = new ColumnRearranger(dataSpec);
Set<String> colNames = new HashSet<String>(Arrays.asList(getColumnNames()));
for (DataColumnSpec spec : rearranger.createSpec()) {
if ((!spec.getType().isCompatible(DoubleValue.class) && !spec.getType().isCompatible(NominalValue.class)) || !colNames.contains(spec.getName())) {
rearranger.remove(spec.getName());
}
}
ExecutionContext save = exec.createSubExecutionContext(rest / 2);
m_subTable = new DefaultDataArray(projection.createColumnRearrangeTable(inData[0], rearranger, projection), 1, inData[0].getRowCount(), save);
m_histograms = histogramColumn.histograms(inData[0], getInHiLiteHandler(0), mins, maxes, means, getColumnNames());
Set<String> nominalColumns = new LinkedHashSet<String>();
for (int i = 0; i < inData[0].getSpec().getNumColumns(); ++i) {
Map<DataCell, Integer> nominalValues = getStatTable().getNominalValues(i);
if (nominalValues != null) {
nominalColumns.add(inData[0].getSpec().getColumnSpec(i).getName());
}
}
final Pair<Map<Integer, Map<Integer, Set<RowKey>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> bucketsAndNominals = HistogramColumn.construct(m_histograms, m_subTable, nominalColumns);
m_buckets = bucketsAndNominals.getFirst();
m_nominalKeys = bucketsAndNominals.getSecond();
} else {
m_histograms = pair.getSecond();
}
return ret;
}
use of org.knime.base.data.statistics.Statistics3Table in project knime-core by knime.
the class ExtendedStatisticsHTMLNodeView method renderNominalRow.
/**
* Create HTML for the given row.
*/
private void renderNominalRow(final int row, final StringBuilder buffer, final String cssClass) {
Statistics3Table model = getNodeModel().getStatTable();
buffer.append("<tr class=\"" + cssClass + "\">\n");
buffer.append("<td valign=\"" + ROW_VERTICAL_ALIGN + "\">");
buffer.append(escapeHtml(model.getSpec().getColumnSpec(row).getName()));
buffer.append("</td>");
buffer.append("<td class=\"numeric\" valign=\"" + ROW_VERTICAL_ALIGN + "\">");
buffer.append(NumberFormat.getInstance().format((long) model.getNumberMissingValues(row)));
buffer.append("</td>\n");
buffer.append("<td><object classid=\"org.knime.base.data.statistics.HistogramColumn.HistogramComponent\" colId=\"").append(row).append("\" width=\"").append(getNodeModel().getHistogramWidth()).append("\" height=\"").append(getNodeModel().getHistogramHeight()).append("\"></object></td>");
buffer.append("</tr>\n");
}
use of org.knime.base.data.statistics.Statistics3Table in project knime-core by knime.
the class ExtendedStatisticsHTMLNodeView method renderRow.
/**
* Create HTML for the given row.
*/
private void renderRow(final int row, final StringBuilder buffer, final String cssClass) {
// boolean first = true;
Statistics3Table model = getNodeModel().getStatTable();
buffer.append("<tr class=\"" + cssClass + "\">\n");
buffer.append("<td valign=\"" + ROW_VERTICAL_ALIGN + "\">");
buffer.append(escapeHtml(model.getSpec().getColumnSpec(row).getName()));
buffer.append("</td>");
for (double v : new double[] { model.getMin()[row], model.getMean(row), model.getMedian(row), model.getMax()[row], model.getStandardDeviation(row), model.getSkewness(row), model.getKurtosis(row) }) {
buffer.append("<td class=\"numeric\" valign=\"" + ROW_VERTICAL_ALIGN + "\">");
buffer.append(Double.isNaN(v) ? "?" : DoubleFormat.formatDouble(v));
buffer.append("</td>\n");
}
NumberFormat nf = NumberFormat.getInstance();
for (int v : new int[] { model.getNumberMissingValues()[row], model.getNumberPositiveInfiniteValues(row), model.getNumberNegativeInfiniteValues(row) }) {
buffer.append("<td class=\"numeric\" valign=\"" + ROW_VERTICAL_ALIGN + "\">");
buffer.append(nf.format(v));
buffer.append("</td>\n");
}
buffer.append("<td><object classid=\"org.knime.base.data.statistics.HistogramColumn.HistogramComponent\" colId=\"").append(row).append("\" width=\"").append(getNodeModel().getHistogramWidth()).append("\" height=\"").append(getNodeModel().getHistogramHeight()).append("\"></object></td>");
buffer.append("</tr>\n");
}
use of org.knime.base.data.statistics.Statistics3Table in project knime-core by knime.
the class ExtendedStatisticsHTMLNodeView method renderNominal.
/**
* @param model An {@link ExtendedStatisticsNodeModel}.
* @return A {@link StringBuilder} with all the stats added for nominal HTML table.
*/
private StringBuilder renderNominal(final ExtendedStatisticsNodeModel model) {
StringBuilder ret = createHtmlHeader();
ret.append("<body>\n");
if (null != getNodeModel()) {
ret.append("<table>\n");
ret.append("<tr>");
for (Iterator<String> iter = Arrays.asList("Column", "No. missings", "Histogram").iterator(); iter.hasNext(); ) {
ret.append("<th class=\"left\">");
String prop = iter.next();
ret.append(prop);
ret.append("</th>");
}
ret.append("</tr>");
int i = 0, colIdx = 0;
Statistics3Table statTable = model.getStatTable();
if (statTable != null) {
for (DataColumnSpec spec : statTable.getSpec()) {
if (model.getStatTable().getNominalValues(colIdx) != null) {
String cssClass = i % 2 == 0 ? "even" : "odd";
int columnIndex = model.getStatTable().getSpec().findColumnIndex(spec.getName());
renderNominalRow(columnIndex, ret, cssClass);
i++;
}
++colIdx;
}
}
ret.append("</table>\n");
} else {
ret.append("No data available.\n");
}
ret.append("</body>\n");
ret.append("</html>\n");
return ret;
}
Aggregations