Search in sources :

Example 1 with HalfDoubleMatrix

use of org.knime.base.util.HalfDoubleMatrix in project knime-core by knime.

the class SortedCorrelationComputer method calculateKendall.

/**
 * Calculates the kendall rank for all pairs of Data table columns based on previously calculated ranks.
 *
 * @param exec the Execution context.
 * @param corrType the type of correlation used, as defined in CorrelationComputeNodeModel
 * @return the output matrix to be turned into the output model
 * @throws CanceledExecutionException if canceled by users
 */
HalfDoubleMatrix calculateKendall(final String corrType, final ExecutionMonitor exec) throws CanceledExecutionException {
    // the ranking must have been calculated before
    assert (m_rank != null);
    final int coCount = m_rank.getDataTableSpec().getNumColumns();
    HalfDoubleMatrix nominatorMatrix = new HalfDoubleMatrix(coCount, /*includeDiagonal=*/
    false);
    double[][] cMatrix = new double[coCount][coCount];
    double[][] dMatrix = new double[coCount][coCount];
    double[][] txMatrix = new double[coCount][coCount];
    double[][] tyMatrix = new double[coCount][coCount];
    // double[][] txyMatrix = new double[coCount][coCount];
    final DataCell[] cells = new DataCell[m_rank.getDataTableSpec().getNumColumns()];
    final DataCell[] cells2 = new DataCell[m_rank.getDataTableSpec().getNumColumns()];
    int rowIndex = 0;
    final int rowCount = m_rank.getRowCount();
    for (DataRow r : m_rank) {
        // multiple times, so we buffer it
        for (int i = 0; i < cells.length; i++) {
            cells[i] = r.getCell(i);
        }
        for (DataRow r2 : m_rank) {
            exec.checkCanceled();
            // multiple times, so we buffer it
            for (int i = 0; i < cells2.length; i++) {
                cells2[i] = r2.getCell(i);
            }
            for (int i = 0; i < coCount; i++) {
                final double x1 = ((DoubleValue) cells[i]).getDoubleValue();
                final double x2 = ((DoubleValue) cells2[i]).getDoubleValue();
                for (int j = 0; j < coCount; j++) {
                    final double y1 = ((DoubleValue) cells[j]).getDoubleValue();
                    final double y2 = ((DoubleValue) cells2[j]).getDoubleValue();
                    if (x1 < x2 && y1 < y2) {
                        // values are concordant
                        cMatrix[i][j]++;
                    } else if (x1 < x2 && y1 > y2) {
                        // values are discordant
                        dMatrix[i][j]++;
                    } else if (x1 != x2 && y1 == y2) {
                        // values are bounded in y
                        tyMatrix[i][j]++;
                    } else if (x1 == x2 && y1 != y2) {
                        // values are bounded in x
                        txMatrix[i][j]++;
                    } else {
                    // (x1 == x2 && y1 == y2) { values are bounded in x and y
                    // txyMatrix[i][j]++; // no measure need this count
                    }
                }
            }
        }
        exec.checkCanceled();
        exec.setProgress(0.95 * rowIndex / rowCount, String.format("Calculating - %d/%d (\"%s\")", rowIndex, rowCount, r.getKey()));
        rowIndex++;
    }
    if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KENDALLA)) {
        double nrOfRows = m_rank.getRowCount();
        // kendalls Tau a
        double divisor = (nrOfRows * (nrOfRows - 1.0)) * 0.5;
        for (int i = 0; i < coCount; i++) {
            for (int j = i + 1; j < coCount; j++) {
                nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / divisor);
            }
            exec.setProgress(0.05 * i / coCount, "Calculating correlations");
        }
    } else if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KENDALLB)) {
        // kendalls Tau b
        for (int i = 0; i < coCount; i++) {
            for (int j = i + 1; j < coCount; j++) {
                double div = Math.sqrt(cMatrix[i][j] + dMatrix[i][j] + txMatrix[i][j]) * Math.sqrt(cMatrix[i][j] + dMatrix[i][j] + tyMatrix[i][j]);
                nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / div);
            }
            exec.setProgress(0.05 * i / coCount, "Calculating correlations");
        }
    } else if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KRUSKALAL)) {
        // Kruskals Gamma
        for (int i = 0; i < coCount; i++) {
            for (int j = i + 1; j < coCount; j++) {
                nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / (cMatrix[i][j] + dMatrix[i][j]));
            }
            exec.setProgress(0.05 * i / coCount, "Calculating correlations");
        }
    }
    return nominatorMatrix;
}
Also used : DoubleValue(org.knime.core.data.DoubleValue) HalfDoubleMatrix(org.knime.base.util.HalfDoubleMatrix) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow)

Example 2 with HalfDoubleMatrix

use of org.knime.base.util.HalfDoubleMatrix in project knime-core by knime.

the class StandCronbachNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    PMCCPortObjectAndSpec model = (PMCCPortObjectAndSpec) inData[0];
    HalfDoubleMatrix mat = model.getCorrelationMatrix();
    double sum = 0;
    double count = 0;
    for (int i = 0; i < mat.getRowCount(); i++) {
        for (int j = i + 1; j < mat.getRowCount(); j++) {
            if (Double.isNaN(mat.get(i, j))) {
                throw new IOException("No NAN values supported for the calculation, " + "try using an alternative correlation meassure");
            }
            sum += mat.get(i, j);
            count++;
        }
    }
    double mean = sum / count;
    double cronbach = (mat.getRowCount() * mean) / (1 + (mat.getRowCount() - 1) * mean);
    BufferedDataContainer out = exec.createDataContainer(getDataTableSpec());
    RowKey k = new RowKey("Cronbach");
    DataRow r = new DefaultRow(k, new DoubleCell(cronbach));
    out.addRowToTable(r);
    out.close();
    return new BufferedDataTable[] { out.getTable() };
}
Also used : PMCCPortObjectAndSpec(org.knime.base.node.preproc.correlation.pmcc.PMCCPortObjectAndSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) HalfDoubleMatrix(org.knime.base.util.HalfDoubleMatrix) BufferedDataTable(org.knime.core.node.BufferedDataTable) IOException(java.io.IOException) DefaultRow(org.knime.core.data.def.DefaultRow) DataRow(org.knime.core.data.DataRow)

Example 3 with HalfDoubleMatrix

use of org.knime.base.util.HalfDoubleMatrix in project knime-core by knime.

the class CorrelationComputeNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    final BufferedDataTable in = (BufferedDataTable) inData[0];
    final DataTableSpec inSpec = in.getDataTableSpec();
    ColumnRearranger filteredTableRearranger = new ColumnRearranger(inSpec);
    String[] includeNames = m_columnFilterModel.applyTo(inSpec).getIncludes();
    filteredTableRearranger.keepOnly(includeNames);
    final BufferedDataTable filteredTable = exec.createColumnRearrangeTable(in, filteredTableRearranger, exec.createSilentSubExecutionContext(0.0));
    final DataTableSpec filteredTableSpec = filteredTable.getDataTableSpec();
    double progStep1 = 0.48;
    double progStep2 = 0.48;
    double progFinish = 1.0 - progStep1 - progStep2;
    CorrelationComputer calculator = new CorrelationComputer(filteredTableSpec, m_maxPossValueCountModel.getIntValue());
    exec.setMessage("Calculating table statistics");
    ExecutionContext execStep1 = exec.createSubExecutionContext(progStep1);
    calculator.calculateStatistics(filteredTable, execStep1);
    execStep1.setProgress(1.0);
    exec.setMessage("Calculating correlation values");
    ExecutionMonitor execStep2 = exec.createSubExecutionContext(progStep2);
    HalfDoubleMatrix correlationMatrix = calculator.calculateOutput(filteredTable, execStep2);
    execStep2.setProgress(1.0);
    exec.setMessage("Assembling output");
    ExecutionContext execFinish = exec.createSubExecutionContext(progFinish);
    PMCCPortObjectAndSpec pmccModel = new PMCCPortObjectAndSpec(includeNames, correlationMatrix);
    BufferedDataTable out = pmccModel.createCorrelationMatrix(execFinish);
    m_correlationTable = out;
    String missValueString = calculator.getNumericMissingValueWarning(4);
    StringBuilder warning = null;
    if (missValueString != null) {
        LOGGER.debug(calculator.getNumericMissingValueWarning(1000));
        warning = new StringBuilder(missValueString);
    }
    String constantColString = calculator.getNumericConstantColumnPairs(4);
    if (constantColString != null) {
        LOGGER.debug(calculator.getNumericConstantColumnPairs(1000));
        if (warning == null) {
            warning = new StringBuilder(constantColString);
        } else {
            warning.append("\n");
            warning.append(constantColString);
        }
    }
    if (warning != null) {
        setWarningMessage(warning.toString());
    }
    return new PortObject[] { out, pmccModel };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMCCPortObjectAndSpec(org.knime.base.node.preproc.correlation.pmcc.PMCCPortObjectAndSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) ExecutionContext(org.knime.core.node.ExecutionContext) HalfDoubleMatrix(org.knime.base.util.HalfDoubleMatrix) BufferedDataTable(org.knime.core.node.BufferedDataTable) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) PortObject(org.knime.core.node.port.PortObject)

Example 4 with HalfDoubleMatrix

use of org.knime.base.util.HalfDoubleMatrix in project knime-core by knime.

the class CorrelationComputer method calculateOutput.

/**
 * Second scan on data. Computes the pair wise correlation for numeric
 * columns and reads the contingency tables of pairs of categorical
 * columns into memory.
 * @param table ...
 * @param exec ...
 * @return the output matrix to be turned into the output model
 * @throws CanceledExecutionException
 */
public HalfDoubleMatrix calculateOutput(final BufferedDataTable table, final ExecutionMonitor exec) throws CanceledExecutionException {
    assert table.getDataTableSpec().equalStructure(m_tableSpec);
    int catCount = m_categoricalColIndexMap.length;
    int categoricalPairsCount = (catCount - 1) * catCount / 2;
    // stores all pair-wise contingency tables,
    // contingencyTables[i] == null <--> either column of the corresponding
    // pair has more than m_maxPossibleValues values
    // http://en.wikipedia.org/wiki/Contingency_table
    int[][][] contingencyTables = new int[categoricalPairsCount][][];
    int valIndex = 0;
    for (int i = 0; i < m_categoricalColIndexMap.length; i++) {
        for (int j = i + 1; j < m_categoricalColIndexMap.length; j++) {
            LinkedHashMap<DataCell, Integer> valuesI = m_possibleValues[i];
            LinkedHashMap<DataCell, Integer> valuesJ = m_possibleValues[j];
            if (valuesI != null && valuesJ != null) {
                int iSize = valuesI.size();
                int jSize = valuesJ.size();
                contingencyTables[valIndex] = new int[iSize][jSize];
            }
            valIndex++;
        }
    }
    final int numColumns = m_tableSpec.getNumColumns();
    HalfDoubleMatrix nominatorMatrix = new HalfDoubleMatrix(numColumns, /*includeDiagonal=*/
    false);
    nominatorMatrix.fill(Double.NaN);
    long rowIndex = 0;
    DataCell[] cells = new DataCell[numColumns];
    final long rowCount = table.size();
    for (int i = 0; i < m_numericColIndexMap.length; i++) {
        final double stdDevI = m_numericStdDevMatrix[i][i];
        if (stdDevI == 0.0) {
            for (int j = i + 1; j < m_numericColIndexMap.length; j++) {
                nominatorMatrix.set(m_numericColIndexMap[i], m_numericColIndexMap[j], Double.NaN);
            }
            m_numericsWithConstantValues.add(new Pair<Integer, Integer>(m_numericColIndexMap[i], null));
        } else {
            for (int j = i + 1; j < m_numericColIndexMap.length; j++) {
                nominatorMatrix.set(m_numericColIndexMap[i], m_numericColIndexMap[j], 0.0);
                final double stdDevJ = m_numericStdDevMatrix[j][j];
                if (stdDevJ == 0.0) {
                    nominatorMatrix.set(m_numericColIndexMap[i], m_numericColIndexMap[j], Double.NaN);
                // rest is fixed when j becomes the current value
                // in the outer loop
                } else {
                    double stdDevIUnderJ = m_numericStdDevMatrix[i][j];
                    double stdDevJUnderI = m_numericStdDevMatrix[j][i];
                    if (stdDevIUnderJ == 0.0) {
                        // all values in column i where j is not missing
                        // are constant
                        m_numericsWithConstantValues.add(new Pair<Integer, Integer>(m_numericColIndexMap[i], m_numericColIndexMap[j]));
                        nominatorMatrix.set(m_numericColIndexMap[i], m_numericColIndexMap[j], Double.NaN);
                    }
                    if (stdDevJUnderI == 0.0) {
                        // all values in column j where i is not missing
                        // are constant
                        m_numericsWithConstantValues.add(new Pair<Integer, Integer>(m_numericColIndexMap[j], m_numericColIndexMap[i]));
                        nominatorMatrix.set(m_numericColIndexMap[i], m_numericColIndexMap[j], Double.NaN);
                    }
                }
            }
        }
    }
    for (DataRow r : table) {
        for (int i = 0; i < cells.length; i++) {
            cells[i] = r.getCell(i);
        }
        for (int i = 0; i < m_numericColIndexMap.length; i++) {
            final DataCell ci = cells[m_numericColIndexMap[i]];
            if (ci.isMissing()) {
                continue;
            }
            if (m_numericStdDevMatrix[i][i] == 0.0) {
                // constant column, reported above
                continue;
            }
            final double di = ((DoubleValue) ci).getDoubleValue();
            for (int j = i + 1; j < m_numericColIndexMap.length; j++) {
                final DataCell cj = cells[m_numericColIndexMap[j]];
                if (cj.isMissing()) {
                    continue;
                }
                final double meanI = m_numericMeanMatrix[i][j];
                final double stdDevI = m_numericStdDevMatrix[i][j];
                final double meanJ = m_numericMeanMatrix[j][i];
                final double stdDevJ = m_numericStdDevMatrix[j][i];
                if (stdDevI == 0.0 || stdDevJ == 0.0) {
                    // reported above
                    continue;
                }
                final double vi = (di - meanI) / stdDevI;
                final double dj = ((DoubleValue) cj).getDoubleValue();
                final double vj = (dj - meanJ) / stdDevJ;
                nominatorMatrix.add(m_numericColIndexMap[i], m_numericColIndexMap[j], vi * vj);
            }
        }
        valIndex = 0;
        for (int i = 0; i < m_categoricalColIndexMap.length; i++) {
            for (int j = i + 1; j < m_categoricalColIndexMap.length; j++, valIndex++) {
                LinkedHashMap<DataCell, Integer> possibleValuesI = m_possibleValues[i];
                LinkedHashMap<DataCell, Integer> possibleValuesJ = m_possibleValues[j];
                if (possibleValuesI == null || possibleValuesJ == null) {
                    continue;
                }
                DataCell ci = r.getCell(m_categoricalColIndexMap[i]);
                DataCell cj = r.getCell(m_categoricalColIndexMap[j]);
                Integer indexI = possibleValuesI.get(ci);
                Integer indexJ = possibleValuesJ.get(cj);
                assert indexI != null && indexI >= 0 : String.format("Value unknown in value list of column \"%s-\": %s", table.getDataTableSpec().getColumnSpec(m_categoricalColIndexMap[i]).getName(), ci);
                assert indexJ != null && indexJ >= 0 : String.format("Value unknown in value list of column \"%s-\": %s", table.getDataTableSpec().getColumnSpec(m_categoricalColIndexMap[j]).getName(), ci);
                contingencyTables[valIndex][indexI][indexJ]++;
            }
        }
        exec.checkCanceled();
        exec.setProgress(rowIndex / (double) rowCount, String.format("Calculating statistics - %d/%d (\"%s\")", rowIndex, rowCount, r.getKey()));
        rowIndex += 1;
    }
    for (int i = 0; i < m_numericColIndexMap.length; i++) {
        for (int j = i + 1; j < m_numericColIndexMap.length; j++) {
            final int trueI = m_numericColIndexMap[i];
            final int trueJ = m_numericColIndexMap[j];
            double t = nominatorMatrix.get(trueI, trueJ);
            if (!Double.isNaN(t)) {
                int validCount = m_numericValidCountMatrix.get(i, j);
                nominatorMatrix.set(trueI, trueJ, t / (validCount - 1));
            }
        }
    }
    valIndex = 0;
    for (int i = 0; i < m_categoricalColIndexMap.length; i++) {
        for (int j = i + 1; j < m_categoricalColIndexMap.length; j++) {
            int[][] contingencyTable = contingencyTables[valIndex];
            double value;
            if (contingencyTable == null) {
                value = Double.NaN;
            } else {
                value = computeCramersV(contingencyTable);
            }
            nominatorMatrix.set(m_categoricalColIndexMap[i], m_categoricalColIndexMap[j], value);
            valIndex++;
        }
    }
    return nominatorMatrix;
}
Also used : DataRow(org.knime.core.data.DataRow) DoubleValue(org.knime.core.data.DoubleValue) HalfDoubleMatrix(org.knime.base.util.HalfDoubleMatrix) DataCell(org.knime.core.data.DataCell)

Example 5 with HalfDoubleMatrix

use of org.knime.base.util.HalfDoubleMatrix in project knime-core by knime.

the class PMCCPortObjectAndSpec method load.

/**
 * Factory method to load from config.
 * @param m to load from.
 * @return new object loaded from argument
 * @throws InvalidSettingsException If that fails.
 */
public static PMCCPortObjectAndSpec load(final ConfigRO m) throws InvalidSettingsException {
    ConfigRO sub = m.getConfig(CFG_INTERNAL);
    String[] names = sub.getStringArray(CFG_NAMES);
    if (names == null) {
        throw new InvalidSettingsException("Column names array is null.");
    }
    if (sub.getBoolean(CFG_CONTAINS_VALUES)) {
        HalfDoubleMatrix corrMatrix = new HalfDoubleMatrix(sub.getConfig(CFG_VALUES));
        return new PMCCPortObjectAndSpec(names, corrMatrix);
    } else {
        return new PMCCPortObjectAndSpec(names);
    }
}
Also used : InvalidSettingsException(org.knime.core.node.InvalidSettingsException) HalfDoubleMatrix(org.knime.base.util.HalfDoubleMatrix) ConfigRO(org.knime.core.node.config.ConfigRO)

Aggregations

HalfDoubleMatrix (org.knime.base.util.HalfDoubleMatrix)8 DataRow (org.knime.core.data.DataRow)5 BufferedDataTable (org.knime.core.node.BufferedDataTable)4 PMCCPortObjectAndSpec (org.knime.base.node.preproc.correlation.pmcc.PMCCPortObjectAndSpec)3 DataCell (org.knime.core.data.DataCell)3 DataTableSpec (org.knime.core.data.DataTableSpec)3 DoubleValue (org.knime.core.data.DoubleValue)3 ExecutionContext (org.knime.core.node.ExecutionContext)3 PortObject (org.knime.core.node.port.PortObject)3 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)2 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)2 IOException (java.io.IOException)1 LinkedHashMap (java.util.LinkedHashMap)1 LinkedHashSet (java.util.LinkedHashSet)1 Map (java.util.Map)1 FilterColumnTable (org.knime.base.data.filter.column.FilterColumnTable)1 Normalizer (org.knime.base.data.normalize.Normalizer)1 StatisticsTable (org.knime.base.data.statistics.StatisticsTable)1 DataColumnSpec (org.knime.core.data.DataColumnSpec)1 DataTable (org.knime.core.data.DataTable)1