use of org.knime.base.data.statistics.StatisticsTable in project knime-core by knime.
the class Normalizer method doDecimalScaling.
/**
* Does the decimal scaling.
*
* @param exec an object to check for user cancelations. Can be
* <code>null</code>.
* @throws CanceledExecutionException if user canceled
* @return the normalized DataTable
*/
public AffineTransTable doDecimalScaling(final ExecutionMonitor exec) throws CanceledExecutionException {
StatisticsTable st;
if (m_table instanceof StatisticsTable) {
st = (StatisticsTable) m_table;
} else {
st = new StatisticsTable(m_table, exec);
}
checkForMissVals(st);
String[] includes = getNames();
double[] max = st.getdoubleMax();
double[] min = st.getdoubleMin();
double[] scales = new double[m_colindices.length];
double[] transforms = new double[m_colindices.length];
double[] mins = new double[m_colindices.length];
double[] maxs = new double[m_colindices.length];
for (int i = 0; i < m_colindices.length; i++) {
int trueIndex = m_colindices[i];
double absMax = Math.abs(max[trueIndex]);
double absMin = Math.abs(min[trueIndex]);
double maxvalue = absMax > absMin ? absMax : absMin;
int exp = 0;
while (Math.abs(maxvalue) > 1) {
maxvalue = maxvalue / 10;
exp++;
}
scales[i] = 1.0 / Math.pow(10, exp);
transforms[i] = 0.0;
mins[i] = -1.0;
maxs[i] = 1.0;
}
String summary = "Decimal Scaling normalization on " + includes.length + " column(s)";
AffineTransConfiguration configuration = new AffineTransConfiguration(includes, scales, transforms, mins, maxs, summary);
return new AffineTransTable(m_table, configuration);
}
use of org.knime.base.data.statistics.StatisticsTable in project knime-core by knime.
the class MissingValueHandlingTable method createMissingValueHandlingTable.
// getColSetting(DataTableSpec, ColSetting[])
/**
* Does missing value handling to the argument table given the col settings
* in an array and also reports progress.
*
* @param table the table to do missing value handling on
* @param colSettings the settings
* @param exec for progress/cancel and to create the buffered data table
* @param warningBuffer To which potential warning messages are added.
* @return a cache table, cleaned up
* @throws CanceledExecutionException if canceled
*/
public static BufferedDataTable createMissingValueHandlingTable(final DataTable table, final ColSetting[] colSettings, final ExecutionContext exec, final StringBuffer warningBuffer) throws CanceledExecutionException {
ColSetting[] colSetting;
try {
colSetting = getColSetting(table.getDataTableSpec(), colSettings, false);
} catch (InvalidSettingsException ise) {
LOGGER.coding("getColSetting method is not supposed to throw " + "an exception, ignoring settings", ise);
DataTableSpec s = table.getDataTableSpec();
colSetting = new ColSetting[s.getNumColumns()];
for (int i = 0; i < s.getNumColumns(); i++) {
colSetting[i] = new ColSetting(s.getColumnSpec(i));
colSetting[i].setMethod(ColSetting.METHOD_NO_HANDLING);
}
}
boolean needStatistics = false;
int mostFrequentColCount = 0;
for (int i = 0; i < colSetting.length; i++) {
ColSetting c = colSetting[i];
switch(c.getMethod()) {
case ColSetting.METHOD_MOST_FREQUENT:
mostFrequentColCount++;
case ColSetting.METHOD_MAX:
case ColSetting.METHOD_MIN:
case ColSetting.METHOD_MEAN:
needStatistics = true;
break;
default:
}
}
int[] mostFrequentCols = new int[mostFrequentColCount];
if (mostFrequentColCount > 0) {
int index = 0;
for (int i = 0; i < colSetting.length; i++) {
ColSetting c = colSetting[i];
switch(c.getMethod()) {
case ColSetting.METHOD_MOST_FREQUENT:
mostFrequentCols[index++] = i;
break;
default:
}
}
}
DataTable t;
ExecutionMonitor e;
if (needStatistics && !(table instanceof StatisticsTable)) {
// for creating statistics table
ExecutionMonitor subExec = exec.createSubProgress(0.5);
t = new MyStatisticsTable(table, subExec, mostFrequentCols);
if (((MyStatisticsTable) t).m_warningMessage != null) {
warningBuffer.append(((MyStatisticsTable) t).m_warningMessage);
}
// for the iterator
e = exec.createSubProgress(0.5);
} else {
t = table;
e = exec;
}
MissingValueHandlingTable mvht = new MissingValueHandlingTable(t, colSetting);
BufferedDataContainer container = exec.createDataContainer(mvht.getDataTableSpec());
e.setMessage("Adding rows...");
int count = 0;
try {
MissingValueHandlingTableIterator it = new MissingValueHandlingTableIterator(mvht, e);
while (it.hasNext()) {
DataRow next;
next = it.next();
e.setMessage("Adding row " + (count + 1) + " (\"" + next.getKey() + "\")");
container.addRowToTable(next);
count++;
}
} catch (MissingValueHandlingTableIterator.RuntimeCanceledExecutionException rcee) {
throw rcee.getCause();
} finally {
container.close();
}
return container.getTable();
}
use of org.knime.base.data.statistics.StatisticsTable in project knime-core by knime.
the class LowVarFilterNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
StatisticsTable statTable = new StatisticsTable(inData[0], exec);
ArrayList<String> includes = new ArrayList<String>();
DataTableSpec s = inData[0].getDataTableSpec();
int colCount = s.getNumColumns();
double threshold = m_varianceThreshold;
HashSet<String> includesHash = new HashSet<String>(Arrays.asList(m_includedColumns));
for (int i = 0; i < colCount; i++) {
DataColumnSpec cs = s.getColumnSpec(i);
if (!includesHash.contains(cs.getName()) || !cs.getType().isCompatible(DoubleValue.class) || statTable.getVariance(i) > threshold) {
includes.add(cs.getName());
}
}
int filteredOutCount = s.getNumColumns() - includes.size();
LOGGER.info("Filtered out " + filteredOutCount + " column(s)");
if (filteredOutCount == 0) {
setWarningMessage("No columns were filtered out.");
}
ColumnRearranger rearranger = new ColumnRearranger(s);
rearranger.keepOnly(includes.toArray(new String[includes.size()]));
BufferedDataTable t = exec.createColumnRearrangeTable(inData[0], rearranger, exec);
return new BufferedDataTable[] { t };
}
use of org.knime.base.data.statistics.StatisticsTable in project knime-core by knime.
the class StatisticsNodeModel method execute.
/**
* Computes the statistics for the DataTable at the inport. Use the view on
* this node to see them.
*
* @see org.knime.core.node.NodeModel
* #execute(BufferedDataTable[],ExecutionContext)
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
StatisticsTable statTable = new StatisticsTable(inData[0], exec);
m_min = statTable.getdoubleMin();
m_max = statTable.getdoubleMax();
m_mean = statTable.getMean();
m_stddev = statTable.getStandardDeviation();
m_variance = statTable.getVariance();
DataTableSpec inspec = inData[0].getDataTableSpec();
m_columnNames = new String[inspec.getNumColumns()];
int position = 0;
for (DataColumnSpec colspec : inspec) {
m_columnNames[position] = colspec.getName();
position++;
}
return new BufferedDataTable[] {};
}
use of org.knime.base.data.statistics.StatisticsTable in project knime-core by knime.
the class MissingValueHandling2Table method createMissingValueHandlingTable.
// getColSetting(DataTableSpec, ColSetting[])
/**
* Does missing value handling to the argument table given the col settings
* in an array and also reports progress.
*
* @param table the table to do missing value handling on
* @param colSettings the settings
* @param exec for progress/cancel and to create the buffered data table
* @param warningBuffer To which potential warning messages are added.
* @return a cache table, cleaned up
* @throws CanceledExecutionException if canceled
*/
public static BufferedDataTable createMissingValueHandlingTable(final DataTable table, final MissingValueHandling2ColSetting[] colSettings, final ExecutionContext exec, final StringBuffer warningBuffer) throws CanceledExecutionException {
MissingValueHandling2ColSetting[] colSetting;
try {
colSetting = getColSetting(table.getDataTableSpec(), colSettings, false);
} catch (InvalidSettingsException ise) {
LOGGER.coding("getColSetting method is not supposed to throw " + "an exception, ignoring settings", ise);
DataTableSpec s = table.getDataTableSpec();
colSetting = new MissingValueHandling2ColSetting[s.getNumColumns()];
for (int i = 0; i < s.getNumColumns(); i++) {
colSetting[i] = new MissingValueHandling2ColSetting(s.getColumnSpec(i));
colSetting[i].setMethod(MissingValueHandling2ColSetting.METHOD_NO_HANDLING);
}
}
boolean needStatistics = false;
int mostFrequentColCount = 0;
for (int i = 0; i < colSetting.length; i++) {
MissingValueHandling2ColSetting c = colSetting[i];
switch(c.getMethod()) {
case MissingValueHandling2ColSetting.METHOD_MOST_FREQUENT:
mostFrequentColCount++;
case MissingValueHandling2ColSetting.METHOD_MAX:
case MissingValueHandling2ColSetting.METHOD_MIN:
case MissingValueHandling2ColSetting.METHOD_MEAN:
needStatistics = true;
break;
default:
}
}
int[] mostFrequentCols = new int[mostFrequentColCount];
if (mostFrequentColCount > 0) {
int index = 0;
for (int i = 0; i < colSetting.length; i++) {
MissingValueHandling2ColSetting c = colSetting[i];
switch(c.getMethod()) {
case MissingValueHandling2ColSetting.METHOD_MOST_FREQUENT:
mostFrequentCols[index++] = i;
break;
default:
}
}
}
DataTable t;
ExecutionMonitor e;
if (needStatistics && !(table instanceof StatisticsTable)) {
// for creating statistics table
ExecutionMonitor subExec = exec.createSubProgress(0.5);
t = new MyStatisticsTable(table, subExec, mostFrequentCols);
if (((MyStatisticsTable) t).m_warningMessage != null) {
warningBuffer.append(((MyStatisticsTable) t).m_warningMessage);
}
// for the iterator
e = exec.createSubProgress(0.5);
} else {
t = table;
e = exec;
}
MissingValueHandling2Table mvht = new MissingValueHandling2Table(t, colSetting);
BufferedDataContainer container = exec.createDataContainer(mvht.getDataTableSpec());
e.setMessage("Adding rows...");
int count = 0;
try {
MissingValueHandling2TableIterator it = new MissingValueHandling2TableIterator(mvht, e);
while (it.hasNext()) {
DataRow next;
next = it.next();
e.setMessage("Adding row " + (count + 1) + " (\"" + next.getKey() + "\")");
container.addRowToTable(next);
count++;
}
} catch (MissingValueHandling2TableIterator.RuntimeCanceledExecutionException rcee) {
throw rcee.getCause();
} finally {
container.close();
}
return container.getTable();
}
Aggregations