use of org.knime.base.util.HalfDoubleMatrix in project knime-core by knime.
the class SortedCorrelationComputer method calculateKendall.
/**
* Calculates the kendall rank for all pairs of Data table columns based on previously calculated ranks.
*
* @param exec the Execution context.
* @param corrType the type of correlation used, as defined in CorrelationComputeNodeModel
* @return the output matrix to be turned into the output model
* @throws CanceledExecutionException if canceled by users
*/
HalfDoubleMatrix calculateKendall(final String corrType, final ExecutionMonitor exec) throws CanceledExecutionException {
// the ranking must have been calculated before
assert (m_rank != null);
final int coCount = m_rank.getDataTableSpec().getNumColumns();
HalfDoubleMatrix nominatorMatrix = new HalfDoubleMatrix(coCount, /*includeDiagonal=*/
false);
double[][] cMatrix = new double[coCount][coCount];
double[][] dMatrix = new double[coCount][coCount];
double[][] txMatrix = new double[coCount][coCount];
double[][] tyMatrix = new double[coCount][coCount];
// double[][] txyMatrix = new double[coCount][coCount];
final DataCell[] cells = new DataCell[m_rank.getDataTableSpec().getNumColumns()];
final DataCell[] cells2 = new DataCell[m_rank.getDataTableSpec().getNumColumns()];
int rowIndex = 0;
final int rowCount = m_rank.getRowCount();
for (DataRow r : m_rank) {
// multiple times, so we buffer it
for (int i = 0; i < cells.length; i++) {
cells[i] = r.getCell(i);
}
for (DataRow r2 : m_rank) {
exec.checkCanceled();
// multiple times, so we buffer it
for (int i = 0; i < cells2.length; i++) {
cells2[i] = r2.getCell(i);
}
for (int i = 0; i < coCount; i++) {
final double x1 = ((DoubleValue) cells[i]).getDoubleValue();
final double x2 = ((DoubleValue) cells2[i]).getDoubleValue();
for (int j = 0; j < coCount; j++) {
final double y1 = ((DoubleValue) cells[j]).getDoubleValue();
final double y2 = ((DoubleValue) cells2[j]).getDoubleValue();
if (x1 < x2 && y1 < y2) {
// values are concordant
cMatrix[i][j]++;
} else if (x1 < x2 && y1 > y2) {
// values are discordant
dMatrix[i][j]++;
} else if (x1 != x2 && y1 == y2) {
// values are bounded in y
tyMatrix[i][j]++;
} else if (x1 == x2 && y1 != y2) {
// values are bounded in x
txMatrix[i][j]++;
} else {
// (x1 == x2 && y1 == y2) { values are bounded in x and y
// txyMatrix[i][j]++; // no measure need this count
}
}
}
}
exec.checkCanceled();
exec.setProgress(0.95 * rowIndex / rowCount, String.format("Calculating - %d/%d (\"%s\")", rowIndex, rowCount, r.getKey()));
rowIndex++;
}
if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KENDALLA)) {
double nrOfRows = m_rank.getRowCount();
// kendalls Tau a
double divisor = (nrOfRows * (nrOfRows - 1.0)) * 0.5;
for (int i = 0; i < coCount; i++) {
for (int j = i + 1; j < coCount; j++) {
nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / divisor);
}
exec.setProgress(0.05 * i / coCount, "Calculating correlations");
}
} else if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KENDALLB)) {
// kendalls Tau b
for (int i = 0; i < coCount; i++) {
for (int j = i + 1; j < coCount; j++) {
double div = Math.sqrt(cMatrix[i][j] + dMatrix[i][j] + txMatrix[i][j]) * Math.sqrt(cMatrix[i][j] + dMatrix[i][j] + tyMatrix[i][j]);
nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / div);
}
exec.setProgress(0.05 * i / coCount, "Calculating correlations");
}
} else if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KRUSKALAL)) {
// Kruskals Gamma
for (int i = 0; i < coCount; i++) {
for (int j = i + 1; j < coCount; j++) {
nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / (cMatrix[i][j] + dMatrix[i][j]));
}
exec.setProgress(0.05 * i / coCount, "Calculating correlations");
}
}
return nominatorMatrix;
}
use of org.knime.base.util.HalfDoubleMatrix in project knime-core by knime.
the class StandCronbachNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
PMCCPortObjectAndSpec model = (PMCCPortObjectAndSpec) inData[0];
HalfDoubleMatrix mat = model.getCorrelationMatrix();
double sum = 0;
double count = 0;
for (int i = 0; i < mat.getRowCount(); i++) {
for (int j = i + 1; j < mat.getRowCount(); j++) {
if (Double.isNaN(mat.get(i, j))) {
throw new IOException("No NAN values supported for the calculation, " + "try using an alternative correlation meassure");
}
sum += mat.get(i, j);
count++;
}
}
double mean = sum / count;
double cronbach = (mat.getRowCount() * mean) / (1 + (mat.getRowCount() - 1) * mean);
BufferedDataContainer out = exec.createDataContainer(getDataTableSpec());
RowKey k = new RowKey("Cronbach");
DataRow r = new DefaultRow(k, new DoubleCell(cronbach));
out.addRowToTable(r);
out.close();
return new BufferedDataTable[] { out.getTable() };
}
use of org.knime.base.util.HalfDoubleMatrix in project knime-core by knime.
the class CorrelationComputeNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
final BufferedDataTable in = (BufferedDataTable) inData[0];
final DataTableSpec inSpec = in.getDataTableSpec();
ColumnRearranger filteredTableRearranger = new ColumnRearranger(inSpec);
String[] includeNames = m_columnFilterModel.applyTo(inSpec).getIncludes();
filteredTableRearranger.keepOnly(includeNames);
final BufferedDataTable filteredTable = exec.createColumnRearrangeTable(in, filteredTableRearranger, exec.createSilentSubExecutionContext(0.0));
final DataTableSpec filteredTableSpec = filteredTable.getDataTableSpec();
double progStep1 = 0.48;
double progStep2 = 0.48;
double progFinish = 1.0 - progStep1 - progStep2;
CorrelationComputer calculator = new CorrelationComputer(filteredTableSpec, m_maxPossValueCountModel.getIntValue());
exec.setMessage("Calculating table statistics");
ExecutionContext execStep1 = exec.createSubExecutionContext(progStep1);
calculator.calculateStatistics(filteredTable, execStep1);
execStep1.setProgress(1.0);
exec.setMessage("Calculating correlation values");
ExecutionMonitor execStep2 = exec.createSubExecutionContext(progStep2);
HalfDoubleMatrix correlationMatrix = calculator.calculateOutput(filteredTable, execStep2);
execStep2.setProgress(1.0);
exec.setMessage("Assembling output");
ExecutionContext execFinish = exec.createSubExecutionContext(progFinish);
PMCCPortObjectAndSpec pmccModel = new PMCCPortObjectAndSpec(includeNames, correlationMatrix);
BufferedDataTable out = pmccModel.createCorrelationMatrix(execFinish);
m_correlationTable = out;
String missValueString = calculator.getNumericMissingValueWarning(4);
StringBuilder warning = null;
if (missValueString != null) {
LOGGER.debug(calculator.getNumericMissingValueWarning(1000));
warning = new StringBuilder(missValueString);
}
String constantColString = calculator.getNumericConstantColumnPairs(4);
if (constantColString != null) {
LOGGER.debug(calculator.getNumericConstantColumnPairs(1000));
if (warning == null) {
warning = new StringBuilder(constantColString);
} else {
warning.append("\n");
warning.append(constantColString);
}
}
if (warning != null) {
setWarningMessage(warning.toString());
}
return new PortObject[] { out, pmccModel };
}
use of org.knime.base.util.HalfDoubleMatrix in project knime-core by knime.
the class CorrelationComputer method calculateOutput.
/**
* Second scan on data. Computes the pair wise correlation for numeric
* columns and reads the contingency tables of pairs of categorical
* columns into memory.
* @param table ...
* @param exec ...
* @return the output matrix to be turned into the output model
* @throws CanceledExecutionException
*/
public HalfDoubleMatrix calculateOutput(final BufferedDataTable table, final ExecutionMonitor exec) throws CanceledExecutionException {
assert table.getDataTableSpec().equalStructure(m_tableSpec);
int catCount = m_categoricalColIndexMap.length;
int categoricalPairsCount = (catCount - 1) * catCount / 2;
// stores all pair-wise contingency tables,
// contingencyTables[i] == null <--> either column of the corresponding
// pair has more than m_maxPossibleValues values
// http://en.wikipedia.org/wiki/Contingency_table
int[][][] contingencyTables = new int[categoricalPairsCount][][];
int valIndex = 0;
for (int i = 0; i < m_categoricalColIndexMap.length; i++) {
for (int j = i + 1; j < m_categoricalColIndexMap.length; j++) {
LinkedHashMap<DataCell, Integer> valuesI = m_possibleValues[i];
LinkedHashMap<DataCell, Integer> valuesJ = m_possibleValues[j];
if (valuesI != null && valuesJ != null) {
int iSize = valuesI.size();
int jSize = valuesJ.size();
contingencyTables[valIndex] = new int[iSize][jSize];
}
valIndex++;
}
}
final int numColumns = m_tableSpec.getNumColumns();
HalfDoubleMatrix nominatorMatrix = new HalfDoubleMatrix(numColumns, /*includeDiagonal=*/
false);
nominatorMatrix.fill(Double.NaN);
long rowIndex = 0;
DataCell[] cells = new DataCell[numColumns];
final long rowCount = table.size();
for (int i = 0; i < m_numericColIndexMap.length; i++) {
final double stdDevI = m_numericStdDevMatrix[i][i];
if (stdDevI == 0.0) {
for (int j = i + 1; j < m_numericColIndexMap.length; j++) {
nominatorMatrix.set(m_numericColIndexMap[i], m_numericColIndexMap[j], Double.NaN);
}
m_numericsWithConstantValues.add(new Pair<Integer, Integer>(m_numericColIndexMap[i], null));
} else {
for (int j = i + 1; j < m_numericColIndexMap.length; j++) {
nominatorMatrix.set(m_numericColIndexMap[i], m_numericColIndexMap[j], 0.0);
final double stdDevJ = m_numericStdDevMatrix[j][j];
if (stdDevJ == 0.0) {
nominatorMatrix.set(m_numericColIndexMap[i], m_numericColIndexMap[j], Double.NaN);
// rest is fixed when j becomes the current value
// in the outer loop
} else {
double stdDevIUnderJ = m_numericStdDevMatrix[i][j];
double stdDevJUnderI = m_numericStdDevMatrix[j][i];
if (stdDevIUnderJ == 0.0) {
// all values in column i where j is not missing
// are constant
m_numericsWithConstantValues.add(new Pair<Integer, Integer>(m_numericColIndexMap[i], m_numericColIndexMap[j]));
nominatorMatrix.set(m_numericColIndexMap[i], m_numericColIndexMap[j], Double.NaN);
}
if (stdDevJUnderI == 0.0) {
// all values in column j where i is not missing
// are constant
m_numericsWithConstantValues.add(new Pair<Integer, Integer>(m_numericColIndexMap[j], m_numericColIndexMap[i]));
nominatorMatrix.set(m_numericColIndexMap[i], m_numericColIndexMap[j], Double.NaN);
}
}
}
}
}
for (DataRow r : table) {
for (int i = 0; i < cells.length; i++) {
cells[i] = r.getCell(i);
}
for (int i = 0; i < m_numericColIndexMap.length; i++) {
final DataCell ci = cells[m_numericColIndexMap[i]];
if (ci.isMissing()) {
continue;
}
if (m_numericStdDevMatrix[i][i] == 0.0) {
// constant column, reported above
continue;
}
final double di = ((DoubleValue) ci).getDoubleValue();
for (int j = i + 1; j < m_numericColIndexMap.length; j++) {
final DataCell cj = cells[m_numericColIndexMap[j]];
if (cj.isMissing()) {
continue;
}
final double meanI = m_numericMeanMatrix[i][j];
final double stdDevI = m_numericStdDevMatrix[i][j];
final double meanJ = m_numericMeanMatrix[j][i];
final double stdDevJ = m_numericStdDevMatrix[j][i];
if (stdDevI == 0.0 || stdDevJ == 0.0) {
// reported above
continue;
}
final double vi = (di - meanI) / stdDevI;
final double dj = ((DoubleValue) cj).getDoubleValue();
final double vj = (dj - meanJ) / stdDevJ;
nominatorMatrix.add(m_numericColIndexMap[i], m_numericColIndexMap[j], vi * vj);
}
}
valIndex = 0;
for (int i = 0; i < m_categoricalColIndexMap.length; i++) {
for (int j = i + 1; j < m_categoricalColIndexMap.length; j++, valIndex++) {
LinkedHashMap<DataCell, Integer> possibleValuesI = m_possibleValues[i];
LinkedHashMap<DataCell, Integer> possibleValuesJ = m_possibleValues[j];
if (possibleValuesI == null || possibleValuesJ == null) {
continue;
}
DataCell ci = r.getCell(m_categoricalColIndexMap[i]);
DataCell cj = r.getCell(m_categoricalColIndexMap[j]);
Integer indexI = possibleValuesI.get(ci);
Integer indexJ = possibleValuesJ.get(cj);
assert indexI != null && indexI >= 0 : String.format("Value unknown in value list of column \"%s-\": %s", table.getDataTableSpec().getColumnSpec(m_categoricalColIndexMap[i]).getName(), ci);
assert indexJ != null && indexJ >= 0 : String.format("Value unknown in value list of column \"%s-\": %s", table.getDataTableSpec().getColumnSpec(m_categoricalColIndexMap[j]).getName(), ci);
contingencyTables[valIndex][indexI][indexJ]++;
}
}
exec.checkCanceled();
exec.setProgress(rowIndex / (double) rowCount, String.format("Calculating statistics - %d/%d (\"%s\")", rowIndex, rowCount, r.getKey()));
rowIndex += 1;
}
for (int i = 0; i < m_numericColIndexMap.length; i++) {
for (int j = i + 1; j < m_numericColIndexMap.length; j++) {
final int trueI = m_numericColIndexMap[i];
final int trueJ = m_numericColIndexMap[j];
double t = nominatorMatrix.get(trueI, trueJ);
if (!Double.isNaN(t)) {
int validCount = m_numericValidCountMatrix.get(i, j);
nominatorMatrix.set(trueI, trueJ, t / (validCount - 1));
}
}
}
valIndex = 0;
for (int i = 0; i < m_categoricalColIndexMap.length; i++) {
for (int j = i + 1; j < m_categoricalColIndexMap.length; j++) {
int[][] contingencyTable = contingencyTables[valIndex];
double value;
if (contingencyTable == null) {
value = Double.NaN;
} else {
value = computeCramersV(contingencyTable);
}
nominatorMatrix.set(m_categoricalColIndexMap[i], m_categoricalColIndexMap[j], value);
valIndex++;
}
}
return nominatorMatrix;
}
use of org.knime.base.util.HalfDoubleMatrix in project knime-core by knime.
the class PMCCPortObjectAndSpec method load.
/**
* Factory method to load from config.
* @param m to load from.
* @return new object loaded from argument
* @throws InvalidSettingsException If that fails.
*/
public static PMCCPortObjectAndSpec load(final ConfigRO m) throws InvalidSettingsException {
ConfigRO sub = m.getConfig(CFG_INTERNAL);
String[] names = sub.getStringArray(CFG_NAMES);
if (names == null) {
throw new InvalidSettingsException("Column names array is null.");
}
if (sub.getBoolean(CFG_CONTAINS_VALUES)) {
HalfDoubleMatrix corrMatrix = new HalfDoubleMatrix(sub.getConfig(CFG_VALUES));
return new PMCCPortObjectAndSpec(names, corrMatrix);
} else {
return new PMCCPortObjectAndSpec(names);
}
}
Aggregations