use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class CAIMDiscretizationNodeModel method createResultTable.
/**
* Creates {@link BufferedDataTable} from a given input table and an
* appropriate {@link DiscretizationScheme}. The result table has replaced
* columns according to the {@link DiscretizationScheme}.
*
* @param exec the context from which to create the
* {@link BufferedDataTable}
* @param table the input data table
* @param discretizationModel the {@link DiscretizationModel} that contains
* the mapping from numerical intervals to nominal String values
* for the included columns
* @return the discretized input data
*/
public static BufferedDataTable createResultTable(final ExecutionContext exec, final BufferedDataTable table, final DiscretizationModel discretizationModel) {
DiscretizationScheme[] dSchemes = discretizationModel.getSchemes();
final String[] includedColumnNames = discretizationModel.getIncludedColumnNames();
// filter the schemes so that only schemes for columns are included
// which are also included in the table
dSchemes = filterNotKnownSchemes(dSchemes, includedColumnNames, table.getDataTableSpec());
DataTableSpec originalTableSpec = table.getDataTableSpec();
DataColumnSpec[] newColumnSpecs = new DataColumnSpec[originalTableSpec.getNumColumns()];
// remembers if an column index is included or not
boolean[] included = new boolean[newColumnSpecs.length];
int counter = 0;
for (DataColumnSpec originalColumnSpec : originalTableSpec) {
// if the column is included for discretizing, change the spec
if (isIncluded(originalColumnSpec, includedColumnNames) > -1) {
// creat a nominal string column spec
newColumnSpecs[counter] = new DataColumnSpecCreator(originalColumnSpec.getName(), StringCell.TYPE).createSpec();
included[counter] = true;
} else {
// add it as is
newColumnSpecs[counter] = originalColumnSpec;
included[counter] = false;
}
counter++;
}
// create the new table spec
DataTableSpec newTableSpec = new DataTableSpec(newColumnSpecs);
// create the result table
BufferedDataContainer container = exec.createDataContainer(newTableSpec);
// discretize the included column values
double rowCounter = 0;
double numRows = table.size();
for (DataRow row : table) {
if (rowCounter % 200 == 0) {
exec.setProgress(rowCounter / numRows);
}
int i = 0;
DataCell[] newCells = new DataCell[row.getNumCells()];
int includedCounter = 0;
for (DataCell cell : row) {
if (included[i]) {
// check for missing values
if (cell.isMissing()) {
newCells[i] = cell;
} else {
// transform the value to the discretized one
double value = ((DoubleValue) cell).getDoubleValue();
String discreteValue = dSchemes[includedCounter].getDiscreteValue(value);
newCells[i] = new StringCell(discreteValue);
}
includedCounter++;
} else {
newCells[i] = cell;
}
i++;
}
container.addRowToTable(new DefaultRow(row.getKey(), newCells));
rowCounter++;
}
container.close();
return container.getTable();
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class ConditionalBoxPlotNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
m_statistics = new LinkedHashMap<DataColumnSpec, double[]>();
m_mildOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
m_extremeOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
double nrRows = inData[0].size();
int rowCount = 0;
int numericIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.numericColumn());
int nominalIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.nominalColumn());
Map<String, Map<Double, Set<RowKey>>> data = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
// some default values .. if one column only has missing values.
for (DataCell d : inData[0].getDataTableSpec().getColumnSpec(nominalIndex).getDomain().getValues()) {
String name = ((StringValue) d).getStringValue();
m_mildOutliers.put(name, new HashMap<Double, Set<RowKey>>());
m_extremeOutliers.put(name, new HashMap<Double, Set<RowKey>>());
}
for (DataRow r : inData[0]) {
exec.checkCanceled();
exec.setProgress(rowCount++ / nrRows, "Separating...");
if (!m_settings.showMissingValues()) {
if (r.getCell(nominalIndex).isMissing()) {
// missing cell in nominal values is unwanted?
continue;
}
}
String nominal = replaceSpaces(r.getCell(nominalIndex).toString());
if (r.getCell(numericIndex).isMissing()) {
// ignore missing cells in numeric column
continue;
}
DoubleValue numeric = (DoubleValue) r.getCell(numericIndex);
Map<Double, Set<RowKey>> map = data.get(nominal);
if (map == null) {
map = new LinkedHashMap<Double, Set<RowKey>>();
}
Set<RowKey> set = map.get(numeric.getDoubleValue());
if (set == null) {
set = new HashSet<RowKey>();
}
set.add(r.getKey());
map.put(numeric.getDoubleValue(), set);
data.put(nominal, map);
}
List<String> keys = new ArrayList<String>(data.keySet());
boolean ignoreMissingValues = false;
if (m_settings.showMissingValues() && !keys.contains(DataType.getMissingCell().toString())) {
// we promised to create data for missing values..
// if there aren't any.. we have to create them ourselves
setWarningMessage("No missing values found.");
ignoreMissingValues = true;
}
Collections.sort(keys);
DataColumnSpec[] colSpecs = createColumnSpec(inData[0].getDataTableSpec().getColumnSpec(nominalIndex), ignoreMissingValues);
if (keys.size() == 0) {
setWarningMessage("All classes are empty.");
}
int dataSetNr = 0;
// for (String d : keys) {
for (DataColumnSpec dcs : colSpecs) {
String d = dcs.getName();
if (data.get(d) == null || keys.size() == 0) {
dataSetNr++;
continue;
}
exec.checkCanceled();
exec.setProgress(dataSetNr / (double) keys.size(), "Creating statistics");
Map<Double, Set<RowKey>> extremeOutliers = new LinkedHashMap<Double, Set<RowKey>>();
Map<Double, Set<RowKey>> mildOutliers = new LinkedHashMap<Double, Set<RowKey>>();
double[] stats = calculateStatistic(data.get(d), mildOutliers, extremeOutliers);
double minimum = stats[BoxPlotNodeModel.MIN];
double maximum = stats[BoxPlotNodeModel.MAX];
DataColumnSpecCreator creator = new DataColumnSpecCreator(colSpecs[dataSetNr]);
creator.setDomain(new DataColumnDomainCreator(new DoubleCell(minimum), new DoubleCell(maximum)).createDomain());
colSpecs[dataSetNr] = creator.createSpec();
m_statistics.put(colSpecs[dataSetNr], stats);
m_mildOutliers.put(d, mildOutliers);
m_extremeOutliers.put(d, extremeOutliers);
dataSetNr++;
}
DataTableSpec dts = new DataTableSpec("MyTempTable", colSpecs);
DataContainer cont = new DataContainer(dts);
cont.close();
m_dataArray = new DefaultDataArray(cont.getTable(), 1, 2);
cont.dispose();
if (ignoreMissingValues) {
DataColumnSpec[] temp = new DataColumnSpec[colSpecs.length + 1];
DataColumnSpec missing = new DataColumnSpecCreator(DataType.getMissingCell().toString(), DataType.getMissingCell().getType()).createSpec();
int i = 0;
while (missing.getName().compareTo(colSpecs[i].getName()) > 0) {
temp[i] = colSpecs[i];
i++;
}
temp[i++] = missing;
while (i < temp.length) {
temp[i] = colSpecs[i - 1];
i++;
}
colSpecs = temp;
}
/* Save inSpec of the numeric column to provide the view a way to
* consider the input domain for normalization. */
m_numColSpec = inData[0].getDataTableSpec().getColumnSpec(numericIndex);
return new BufferedDataTable[] { createOutputTable(inData[0].getDataTableSpec(), colSpecs, exec).getTable() };
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class BoxPlotDrawingPane method paintOutlierLabels.
/**
* Paints the label(value) of each outlier dot.
* @param g graphics.
*/
protected void paintOutlierLabels(final Graphics g) {
int fontHeight = g.getFontMetrics().getHeight();
DotInfoArray dotArray = getDotInfoArray();
DotInfo lastDot = null;
for (DotInfo dot : dotArray.getDots()) {
if (lastDot != null && dot.getXCoord() == lastDot.getXCoord()) {
// check the y coordinates for enough space
if (Math.abs(lastDot.getYCoord() - dot.getYCoord()) < fontHeight) {
// lastDot = dot;
continue;
}
}
int y = dot.getYCoord() + fontHeight / 4;
int x = dot.getXCoord() + DOT_SIZE;
if (dot.getYDomainValue() != null) {
double d = ((DoubleValue) dot.getYDomainValue()).getDoubleValue();
g.drawString(LabelPaintUtil.getDoubleAsString(d, Box.ROUNDING_FACTOR), x, y);
}
lastDot = dot;
}
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class BoxPlotNodeModel method detectOutliers.
/**
* Detects mild (= < 3 * IQR) and extreme (= > 3 * IQR) outliers.
* @param table the sorted! table containing the values.
* @param iqr the interquartile range
* @param mild list to store mild outliers
* @param extreme list to store extreme outliers
* @param colIdx the index for the column of interest
* @param q quartiles the lower quartile at 0,upper quartile at 1.
* @param whiskers array to store the lower and upper whisker bar
*/
public void detectOutliers(final DataTable table, final double iqr, final double[] q, final Map<Double, Set<RowKey>> mild, final Map<Double, Set<RowKey>> extreme, final double[] whiskers, final int colIdx) {
boolean searchLowerWhisker = true;
boolean searchUpperWhisker = true;
for (DataRow row : table) {
DataCell cell = row.getCell(colIdx);
if (cell.isMissing()) {
continue;
}
double value = ((DoubleValue) cell).getDoubleValue();
// lower outlier
if (value < q[0] - (1.5 * iqr)) {
// mild
if (value > q[0] - (3.0 * iqr)) {
Set<RowKey> keys = mild.get(value);
if (keys == null) {
keys = new HashSet<RowKey>();
}
keys.add(row.getKey());
mild.put(value, keys);
} else {
// extreme
Set<RowKey> keys = mild.get(value);
if (keys == null) {
keys = new HashSet<RowKey>();
}
keys.add(row.getKey());
extreme.put(value, keys);
}
} else if (value > q[1] + (1.5 * iqr)) {
// upper outlier
searchUpperWhisker = false;
if (value < q[1] + (3.0 * iqr)) {
// mild
Set<RowKey> keys = mild.get(value);
if (keys == null) {
keys = new HashSet<RowKey>();
}
keys.add(row.getKey());
mild.put(value, keys);
} else {
// extreme
Set<RowKey> keys = mild.get(value);
if (keys == null) {
keys = new HashSet<RowKey>();
}
keys.add(row.getKey());
extreme.put(value, keys);
}
} else if (searchLowerWhisker) {
whiskers[0] = value;
searchLowerWhisker = false;
}
if (searchUpperWhisker) {
whiskers[1] = value;
}
}
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class BoxplotCalculator method calculateMultiple.
/**
* Calculates the necessary statistics for a non-conditional boxplot.
* @param table the input data
* @param numCol array of names of numeric columns to plot
* @param exec Execution context to report progress to
* @return LinkedHashMap with the column name as key and statistics as value
* @throws CanceledExecutionException when the user cancels the execution
*/
public LinkedHashMap<String, BoxplotStatistics> calculateMultiple(final BufferedDataTable table, final String[] numCol, final ExecutionContext exec) throws CanceledExecutionException {
DataTableSpec spec = table.getSpec();
int[] numColIdxs = new int[numCol.length];
for (int i = 0; i < numCol.length; i++) {
numColIdxs[i] = spec.findColumnIndex(numCol[i]);
}
LinkedHashMap<String, DataContainer> containers = new LinkedHashMap<String, DataContainer>();
for (int i = 0; i < numCol.length; i++) {
containers.put(numCol[i], exec.createDataContainer(new DataTableSpec(new String[] { "col" }, new DataType[] { DoubleCell.TYPE })));
}
ExecutionContext subExec = exec.createSilentSubExecutionContext(0.7);
long[] numMissValPerCol = new long[numCol.length];
int count = 0;
for (DataRow row : table) {
exec.checkCanceled();
subExec.setProgress((double) count++ / table.size());
for (int i = 0; i < numCol.length; i++) {
DataCell cell = row.getCell(numColIdxs[i]);
if (!cell.isMissing()) {
containers.get(numCol[i]).addRowToTable(new DefaultRow(row.getKey(), cell));
} else {
numMissValPerCol[i]++;
}
}
}
LinkedHashMap<String, BoxplotStatistics> statsMap = new LinkedHashMap<>();
ExecutionContext subExec2 = exec.createSilentSubExecutionContext(1.0);
count = 0;
List<String> excludedDataColList = new ArrayList<String>();
for (Entry<String, DataContainer> entry : containers.entrySet()) {
exec.checkCanceled();
subExec2.setProgress((double) count++ / containers.size());
Set<Outlier> extremeOutliers = new HashSet<Outlier>();
Set<Outlier> mildOutliers = new HashSet<Outlier>();
entry.getValue().close();
BufferedDataTable catTable = (BufferedDataTable) entry.getValue().getTable();
if (catTable.size() == 0) {
excludedDataColList.add(entry.getKey());
continue;
}
SortedTable st = new SortedTable(catTable, new Comparator<DataRow>() {
@Override
public int compare(final DataRow o1, final DataRow o2) {
DataCell c1 = o1.getCell(0);
DataCell c2 = o2.getCell(0);
double d1 = ((DoubleValue) c1).getDoubleValue();
double d2 = ((DoubleValue) c2).getDoubleValue();
if (d1 == d2) {
return 0;
} else {
return d1 < d2 ? -1 : 1;
}
}
}, false, exec);
double min = 0, max = 0, q1 = 0, q3 = 0, median = 0;
boolean dq1 = catTable.size() % 4 == 0;
long q1Idx = catTable.size() / 4;
boolean dq3 = 3 * catTable.size() % 4 == 0;
long q3Idx = 3 * catTable.size() / 4;
boolean dMedian = catTable.size() % 2 == 0;
long medianIdx = catTable.size() / 2;
int counter = 0;
for (DataRow row : st) {
double val = ((DoubleValue) row.getCell(0)).getDoubleValue();
if (counter == 0) {
min = val;
}
if (counter == catTable.size() - 1) {
max = val;
}
if (counter == q1Idx - 1 && dq1) {
q1 = val;
}
if (counter == q1Idx || (counter == 0 && st.size() <= 3)) {
if (dq1) {
q1 = (q1 + val) / 2.0;
} else {
q1 = val;
}
}
if (counter == medianIdx - 1 && dMedian) {
median = val;
}
if (counter == medianIdx) {
if (dMedian) {
median = (median + val) / 2;
} else {
median = val;
}
}
if (counter == q3Idx - 1 && dq3) {
q3 = val;
}
if (counter == q3Idx || (counter == st.size() - 1 && st.size() <= 3)) {
if (dq3) {
q3 = (q3 + val) / 2.0;
} else {
q3 = val;
}
}
counter++;
}
double iqr = q3 - q1;
double lowerWhisker = min;
double upperWhisker = max;
double upperWhiskerFence = q3 + (1.5 * iqr);
double lowerWhiskerFence = q1 - (1.5 * iqr);
double lowerFence = q1 - (3 * iqr);
double upperFence = q3 + (3 * iqr);
for (DataRow row : st) {
double value = ((DoubleValue) row.getCell(0)).getDoubleValue();
String rowKey = row.getKey().getString();
if (value < lowerFence) {
extremeOutliers.add(new Outlier(value, rowKey));
} else if (value < lowerWhiskerFence) {
mildOutliers.add(new Outlier(value, rowKey));
} else if (lowerWhisker < lowerWhiskerFence && value >= lowerWhiskerFence) {
lowerWhisker = value;
} else if (value <= upperWhiskerFence) {
upperWhisker = value;
} else if (value > upperFence) {
extremeOutliers.add(new Outlier(value, rowKey));
} else if (value > upperWhiskerFence) {
mildOutliers.add(new Outlier(value, rowKey));
}
}
statsMap.put(entry.getKey(), new BoxplotStatistics(mildOutliers, extremeOutliers, min, max, lowerWhisker, q1, median, q3, upperWhisker));
}
// missing values part
m_excludedDataCols = excludedDataColList.toArray(new String[excludedDataColList.size()]);
m_numMissValPerCol = new LinkedHashMap<String, Long>();
for (int i = 0; i < numCol.length; i++) {
if (numMissValPerCol[i] > 0 && !excludedDataColList.contains(numCol[i])) {
m_numMissValPerCol.put(numCol[i], numMissValPerCol[i]);
}
}
return statsMap;
}
Aggregations