use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class RuleSetToTable method toString.
/**
* Converts a {@link DataCell} to {@link String} for rules.
*
* @param cell A {@link DataCell}.
* @return The value of {@code cell} as a {@link String}, properly escaped.
*/
public static String toString(final DataCell cell) {
if (cell.isMissing()) {
return "\"?\"";
}
if (cell instanceof StringValue) {
StringValue sv = (StringValue) cell;
String s = sv.getStringValue();
return escapedText(s);
}
if (cell instanceof BooleanValue) {
return Boolean.toString(((BooleanValue) cell).getBooleanValue()).toUpperCase();
}
if (cell instanceof DoubleValue) {
return cell.toString();
}
return escapedText(cell.toString());
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class HistogramColumn method constructFromDataArray.
/**
* Constructs the helper data structures from the numeric hostigran models and the data as {@link DataArray}.
*
* @param histograms The numeric histograms.
* @param data The input data.
* @param nominalColumnNames The nominal column names.
* @return The helper data structures.
* @see #construct(Map, DataTable, Set)
*/
protected static Pair<Map<Integer, Map<Integer, Set<RowKey>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> constructFromDataArray(final Map<Integer, HistogramNumericModel> histograms, final DataTable data, final Set<String> nominalColumnNames) {
Map<Integer, Map<Integer, Set<RowKey>>> numericMapping = new HashMap<Integer, Map<Integer, Set<RowKey>>>();
Map<Integer, Map<DataValue, Set<RowKey>>> nominalMapping = new HashMap<Integer, Map<DataValue, Set<RowKey>>>();
DataTableSpec tableSpec = data.getDataTableSpec();
for (DataColumnSpec colSpec : tableSpec) {
int colIndex = tableSpec.findColumnIndex(colSpec.getName());
if (colSpec.getType().isCompatible(DoubleValue.class)) {
// + colIndex;
if (histograms.containsKey(Integer.valueOf(colIndex)) && histograms.get(colIndex) != null) {
numericMapping.put(colIndex, new HashMap<Integer, Set<RowKey>>());
}
}
if (colSpec.getDomain().hasValues() || nominalColumnNames.contains(colSpec.getName())) {
nominalMapping.put(colIndex, new HashMap<DataValue, Set<RowKey>>());
}
}
for (DataRow dataRow : data) {
for (Entry<Integer, Map<Integer, Set<RowKey>>> outer : numericMapping.entrySet()) {
Integer key = outer.getKey();
DataCell cell = dataRow.getCell(key);
if (cell instanceof DoubleValue) {
DoubleValue dv = (DoubleValue) cell;
Integer bin = Integer.valueOf(histograms.get(key).findBin(dv));
Map<Integer, Set<RowKey>> inner = outer.getValue();
if (!inner.containsKey(bin)) {
inner.put(bin, new HashSet<RowKey>());
}
inner.get(bin).add(dataRow.getKey());
}
}
for (Entry<Integer, Map<DataValue, Set<RowKey>>> outer : nominalMapping.entrySet()) {
int key = outer.getKey().intValue();
DataCell cell = dataRow.getCell(key);
if (!cell.isMissing()) /* && cell instanceof NominalValue*/
{
Map<DataValue, Set<RowKey>> inner = outer.getValue();
if (!inner.containsKey(cell)) {
inner.put(cell, new HashSet<RowKey>());
}
inner.get(cell).add(dataRow.getKey());
}
}
}
return Pair.create(numericMapping, nominalMapping);
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class HistogramColumn method loadHistograms.
/**
* Loads the histograms from the saved internal files.
*
* @param histogramsGz The file for the histograms.
* @param dataArrayGz The data array file for the row keys.
* @param nominalColumns The nominal columns.
* @param strategy The strategy used to compute the bins.
* @param means The mean values for the numeric columns.
* @return A triple (Pair(Pair(,),)) of histograms, numeric and nominal row keys.
* @throws IOException Failed to read the files.
* @throws InvalidSettingsException Something went wrong.
*/
public static Pair<Pair<Map<Integer, ? extends HistogramModel<?>>, Map<Integer, Map<Integer, Set<RowKey>>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> loadHistograms(final File histogramsGz, final File dataArrayGz, final Set<String> nominalColumns, final BinNumberSelectionStrategy strategy, final double[] means) throws IOException, InvalidSettingsException {
Map<Integer, Map<Integer, Set<RowKey>>> numericKeys = new HashMap<Integer, Map<Integer, Set<RowKey>>>();
Map<Integer, HistogramNumericModel> histograms = loadHistogramsPrivate(histogramsGz, numericKeys, strategy, means);
Map<Integer, Map<DataValue, Set<RowKey>>> nominalKeys = new HashMap<Integer, Map<DataValue, Set<RowKey>>>();
ContainerTable table = DataContainer.readFromZip(dataArrayGz);
Set<Integer> numericColIndices = numericKeys.keySet();
for (String colName : nominalColumns) {
int colIndex = table.getDataTableSpec().findColumnIndex(colName);
if (colIndex < 0) {
continue;
}
nominalKeys.put(Integer.valueOf(colIndex), new HashMap<DataValue, Set<RowKey>>());
}
for (DataRow dataRow : table) {
for (Integer col : numericColIndices) {
// Integer col = Integer.valueOf(colIdx);
HistogramNumericModel hd = histograms.get(col);
Map<Integer, Set<RowKey>> map = numericKeys.get(col);
DataCell cell = dataRow.getCell(col.intValue());
if (!cell.isMissing() && cell instanceof DoubleValue) {
DoubleValue dv = (DoubleValue) cell;
Integer bin = Integer.valueOf(hd.findBin(dv));
if (!map.containsKey(bin)) {
map.put(bin, new HashSet<RowKey>());
}
map.get(bin).add(dataRow.getKey());
}
}
for (Entry<Integer, Map<DataValue, Set<RowKey>>> entry : nominalKeys.entrySet()) {
DataCell value = dataRow.getCell(entry.getKey().intValue());
Map<DataValue, Set<RowKey>> map = entry.getValue();
if (!map.containsKey(value)) {
map.put(value, new HashSet<RowKey>());
}
map.get(value).add(dataRow.getKey());
}
}
return Pair.create(new Pair<Map<Integer, ? extends HistogramModel<?>>, Map<Integer, Map<Integer, Set<RowKey>>>>(histograms, numericKeys), nominalKeys);
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class SortedCorrelationComputer method calculateKendall.
/**
* Calculates the kendall rank for all pairs of Data table columns based on previously calculated ranks.
*
* @param exec the Execution context.
* @param corrType the type of correlation used, as defined in CorrelationComputeNodeModel
* @return the output matrix to be turned into the output model
* @throws CanceledExecutionException if canceled by users
*/
HalfDoubleMatrix calculateKendall(final String corrType, final ExecutionMonitor exec) throws CanceledExecutionException {
// the ranking must have been calculated before
assert (m_rank != null);
final int coCount = m_rank.getDataTableSpec().getNumColumns();
HalfDoubleMatrix nominatorMatrix = new HalfDoubleMatrix(coCount, /*includeDiagonal=*/
false);
double[][] cMatrix = new double[coCount][coCount];
double[][] dMatrix = new double[coCount][coCount];
double[][] txMatrix = new double[coCount][coCount];
double[][] tyMatrix = new double[coCount][coCount];
// double[][] txyMatrix = new double[coCount][coCount];
final DataCell[] cells = new DataCell[m_rank.getDataTableSpec().getNumColumns()];
final DataCell[] cells2 = new DataCell[m_rank.getDataTableSpec().getNumColumns()];
int rowIndex = 0;
final int rowCount = m_rank.getRowCount();
for (DataRow r : m_rank) {
// multiple times, so we buffer it
for (int i = 0; i < cells.length; i++) {
cells[i] = r.getCell(i);
}
for (DataRow r2 : m_rank) {
exec.checkCanceled();
// multiple times, so we buffer it
for (int i = 0; i < cells2.length; i++) {
cells2[i] = r2.getCell(i);
}
for (int i = 0; i < coCount; i++) {
final double x1 = ((DoubleValue) cells[i]).getDoubleValue();
final double x2 = ((DoubleValue) cells2[i]).getDoubleValue();
for (int j = 0; j < coCount; j++) {
final double y1 = ((DoubleValue) cells[j]).getDoubleValue();
final double y2 = ((DoubleValue) cells2[j]).getDoubleValue();
if (x1 < x2 && y1 < y2) {
// values are concordant
cMatrix[i][j]++;
} else if (x1 < x2 && y1 > y2) {
// values are discordant
dMatrix[i][j]++;
} else if (x1 != x2 && y1 == y2) {
// values are bounded in y
tyMatrix[i][j]++;
} else if (x1 == x2 && y1 != y2) {
// values are bounded in x
txMatrix[i][j]++;
} else {
// (x1 == x2 && y1 == y2) { values are bounded in x and y
// txyMatrix[i][j]++; // no measure need this count
}
}
}
}
exec.checkCanceled();
exec.setProgress(0.95 * rowIndex / rowCount, String.format("Calculating - %d/%d (\"%s\")", rowIndex, rowCount, r.getKey()));
rowIndex++;
}
if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KENDALLA)) {
double nrOfRows = m_rank.getRowCount();
// kendalls Tau a
double divisor = (nrOfRows * (nrOfRows - 1.0)) * 0.5;
for (int i = 0; i < coCount; i++) {
for (int j = i + 1; j < coCount; j++) {
nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / divisor);
}
exec.setProgress(0.05 * i / coCount, "Calculating correlations");
}
} else if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KENDALLB)) {
// kendalls Tau b
for (int i = 0; i < coCount; i++) {
for (int j = i + 1; j < coCount; j++) {
double div = Math.sqrt(cMatrix[i][j] + dMatrix[i][j] + txMatrix[i][j]) * Math.sqrt(cMatrix[i][j] + dMatrix[i][j] + tyMatrix[i][j]);
nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / div);
}
exec.setProgress(0.05 * i / coCount, "Calculating correlations");
}
} else if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KRUSKALAL)) {
// Kruskals Gamma
for (int i = 0; i < coCount; i++) {
for (int j = i + 1; j < coCount; j++) {
nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / (cMatrix[i][j] + dMatrix[i][j]));
}
exec.setProgress(0.05 * i / coCount, "Calculating correlations");
}
}
return nominatorMatrix;
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class TwoSampleTTest method execute.
public TwoSampleTTestStatistics[] execute(final BufferedDataTable table, final ExecutionContext exec) throws InvalidSettingsException, CanceledExecutionException {
DataTableSpec spec = table.getDataTableSpec();
int groupingIndex = spec.findColumnIndex(m_grouping.getColumn());
if (groupingIndex == -1) {
throw new InvalidSettingsException("Grouping column not found.");
}
int[] testColumnsIndex = new int[m_testColumns.length];
for (int i = 0; i < testColumnsIndex.length; i++) {
testColumnsIndex[i] = spec.findColumnIndex(m_testColumns[i]);
}
int testColumnCount = m_testColumns.length;
TwoSampleTTestStatistics[] result = new TwoSampleTTestStatistics[testColumnCount];
for (int i = 0; i < testColumnCount; i++) {
result[i] = new TwoSampleTTestStatistics(m_testColumns[i], m_grouping.getGroupLabels(), m_confidenceIntervalProb);
}
final int rowCount = table.getRowCount();
int rowIndex = 0;
for (DataRow row : table) {
exec.checkCanceled();
exec.setProgress(rowIndex++ / (double) rowCount, rowIndex + "/" + rowCount + " (\"" + row.getKey() + "\")");
DataCell groupCell = row.getCell(groupingIndex);
Group group = m_grouping.getGroup(groupCell);
for (int i = 0; i < testColumnCount; i++) {
if (group == null) {
if (groupCell.isMissing()) {
result[i].addMissingGroup();
} else {
result[i].addIgnoredGroup();
}
continue;
}
DataCell cell = row.getCell(testColumnsIndex[i]);
if (!cell.isMissing()) {
DoubleValue value = (DoubleValue) cell;
result[i].addValue(value.getDoubleValue(), group);
} else {
result[i].addMissing(group);
}
}
}
return result;
}
Aggregations