Search in sources :

Example 6 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class RuleSetToTable method toString.

/**
 * Converts a {@link DataCell} to {@link String} for rules.
 *
 * @param cell A {@link DataCell}.
 * @return The value of {@code cell} as a {@link String}, properly escaped.
 */
public static String toString(final DataCell cell) {
    if (cell.isMissing()) {
        return "\"?\"";
    }
    if (cell instanceof StringValue) {
        StringValue sv = (StringValue) cell;
        String s = sv.getStringValue();
        return escapedText(s);
    }
    if (cell instanceof BooleanValue) {
        return Boolean.toString(((BooleanValue) cell).getBooleanValue()).toUpperCase();
    }
    if (cell instanceof DoubleValue) {
        return cell.toString();
    }
    return escapedText(cell.toString());
}
Also used : DoubleValue(org.knime.core.data.DoubleValue) BooleanValue(org.knime.core.data.BooleanValue) StringValue(org.knime.core.data.StringValue)

Example 7 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class HistogramColumn method constructFromDataArray.

/**
 * Constructs the helper data structures from the numeric hostigran models and the data as {@link DataArray}.
 *
 * @param histograms The numeric histograms.
 * @param data The input data.
 * @param nominalColumnNames The nominal column names.
 * @return The helper data structures.
 * @see #construct(Map, DataTable, Set)
 */
protected static Pair<Map<Integer, Map<Integer, Set<RowKey>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> constructFromDataArray(final Map<Integer, HistogramNumericModel> histograms, final DataTable data, final Set<String> nominalColumnNames) {
    Map<Integer, Map<Integer, Set<RowKey>>> numericMapping = new HashMap<Integer, Map<Integer, Set<RowKey>>>();
    Map<Integer, Map<DataValue, Set<RowKey>>> nominalMapping = new HashMap<Integer, Map<DataValue, Set<RowKey>>>();
    DataTableSpec tableSpec = data.getDataTableSpec();
    for (DataColumnSpec colSpec : tableSpec) {
        int colIndex = tableSpec.findColumnIndex(colSpec.getName());
        if (colSpec.getType().isCompatible(DoubleValue.class)) {
            // + colIndex;
            if (histograms.containsKey(Integer.valueOf(colIndex)) && histograms.get(colIndex) != null) {
                numericMapping.put(colIndex, new HashMap<Integer, Set<RowKey>>());
            }
        }
        if (colSpec.getDomain().hasValues() || nominalColumnNames.contains(colSpec.getName())) {
            nominalMapping.put(colIndex, new HashMap<DataValue, Set<RowKey>>());
        }
    }
    for (DataRow dataRow : data) {
        for (Entry<Integer, Map<Integer, Set<RowKey>>> outer : numericMapping.entrySet()) {
            Integer key = outer.getKey();
            DataCell cell = dataRow.getCell(key);
            if (cell instanceof DoubleValue) {
                DoubleValue dv = (DoubleValue) cell;
                Integer bin = Integer.valueOf(histograms.get(key).findBin(dv));
                Map<Integer, Set<RowKey>> inner = outer.getValue();
                if (!inner.containsKey(bin)) {
                    inner.put(bin, new HashSet<RowKey>());
                }
                inner.get(bin).add(dataRow.getKey());
            }
        }
        for (Entry<Integer, Map<DataValue, Set<RowKey>>> outer : nominalMapping.entrySet()) {
            int key = outer.getKey().intValue();
            DataCell cell = dataRow.getCell(key);
            if (!cell.isMissing()) /* && cell instanceof NominalValue*/
            {
                Map<DataValue, Set<RowKey>> inner = outer.getValue();
                if (!inner.containsKey(cell)) {
                    inner.put(cell, new HashSet<RowKey>());
                }
                inner.get(cell).add(dataRow.getKey());
            }
        }
    }
    return Pair.create(numericMapping, nominalMapping);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) Set(java.util.Set) HashSet(java.util.HashSet) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DataValue(org.knime.core.data.DataValue) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 8 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class HistogramColumn method loadHistograms.

/**
 * Loads the histograms from the saved internal files.
 *
 * @param histogramsGz The file for the histograms.
 * @param dataArrayGz The data array file for the row keys.
 * @param nominalColumns The nominal columns.
 * @param strategy The strategy used to compute the bins.
 * @param means The mean values for the numeric columns.
 * @return A triple (Pair(Pair(,),)) of histograms, numeric and nominal row keys.
 * @throws IOException Failed to read the files.
 * @throws InvalidSettingsException Something went wrong.
 */
public static Pair<Pair<Map<Integer, ? extends HistogramModel<?>>, Map<Integer, Map<Integer, Set<RowKey>>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> loadHistograms(final File histogramsGz, final File dataArrayGz, final Set<String> nominalColumns, final BinNumberSelectionStrategy strategy, final double[] means) throws IOException, InvalidSettingsException {
    Map<Integer, Map<Integer, Set<RowKey>>> numericKeys = new HashMap<Integer, Map<Integer, Set<RowKey>>>();
    Map<Integer, HistogramNumericModel> histograms = loadHistogramsPrivate(histogramsGz, numericKeys, strategy, means);
    Map<Integer, Map<DataValue, Set<RowKey>>> nominalKeys = new HashMap<Integer, Map<DataValue, Set<RowKey>>>();
    ContainerTable table = DataContainer.readFromZip(dataArrayGz);
    Set<Integer> numericColIndices = numericKeys.keySet();
    for (String colName : nominalColumns) {
        int colIndex = table.getDataTableSpec().findColumnIndex(colName);
        if (colIndex < 0) {
            continue;
        }
        nominalKeys.put(Integer.valueOf(colIndex), new HashMap<DataValue, Set<RowKey>>());
    }
    for (DataRow dataRow : table) {
        for (Integer col : numericColIndices) {
            // Integer col = Integer.valueOf(colIdx);
            HistogramNumericModel hd = histograms.get(col);
            Map<Integer, Set<RowKey>> map = numericKeys.get(col);
            DataCell cell = dataRow.getCell(col.intValue());
            if (!cell.isMissing() && cell instanceof DoubleValue) {
                DoubleValue dv = (DoubleValue) cell;
                Integer bin = Integer.valueOf(hd.findBin(dv));
                if (!map.containsKey(bin)) {
                    map.put(bin, new HashSet<RowKey>());
                }
                map.get(bin).add(dataRow.getKey());
            }
        }
        for (Entry<Integer, Map<DataValue, Set<RowKey>>> entry : nominalKeys.entrySet()) {
            DataCell value = dataRow.getCell(entry.getKey().intValue());
            Map<DataValue, Set<RowKey>> map = entry.getValue();
            if (!map.containsKey(value)) {
                map.put(value, new HashSet<RowKey>());
            }
            map.get(value).add(dataRow.getKey());
        }
    }
    return Pair.create(new Pair<Map<Integer, ? extends HistogramModel<?>>, Map<Integer, Map<Integer, Set<RowKey>>>>(histograms, numericKeys), nominalKeys);
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DataValue(org.knime.core.data.DataValue) DataRow(org.knime.core.data.DataRow) ContainerTable(org.knime.core.data.container.ContainerTable) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 9 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class SortedCorrelationComputer method calculateKendall.

/**
 * Calculates the kendall rank for all pairs of Data table columns based on previously calculated ranks.
 *
 * @param exec the Execution context.
 * @param corrType the type of correlation used, as defined in CorrelationComputeNodeModel
 * @return the output matrix to be turned into the output model
 * @throws CanceledExecutionException if canceled by users
 */
HalfDoubleMatrix calculateKendall(final String corrType, final ExecutionMonitor exec) throws CanceledExecutionException {
    // the ranking must have been calculated before
    assert (m_rank != null);
    final int coCount = m_rank.getDataTableSpec().getNumColumns();
    HalfDoubleMatrix nominatorMatrix = new HalfDoubleMatrix(coCount, /*includeDiagonal=*/
    false);
    double[][] cMatrix = new double[coCount][coCount];
    double[][] dMatrix = new double[coCount][coCount];
    double[][] txMatrix = new double[coCount][coCount];
    double[][] tyMatrix = new double[coCount][coCount];
    // double[][] txyMatrix = new double[coCount][coCount];
    final DataCell[] cells = new DataCell[m_rank.getDataTableSpec().getNumColumns()];
    final DataCell[] cells2 = new DataCell[m_rank.getDataTableSpec().getNumColumns()];
    int rowIndex = 0;
    final int rowCount = m_rank.getRowCount();
    for (DataRow r : m_rank) {
        // multiple times, so we buffer it
        for (int i = 0; i < cells.length; i++) {
            cells[i] = r.getCell(i);
        }
        for (DataRow r2 : m_rank) {
            exec.checkCanceled();
            // multiple times, so we buffer it
            for (int i = 0; i < cells2.length; i++) {
                cells2[i] = r2.getCell(i);
            }
            for (int i = 0; i < coCount; i++) {
                final double x1 = ((DoubleValue) cells[i]).getDoubleValue();
                final double x2 = ((DoubleValue) cells2[i]).getDoubleValue();
                for (int j = 0; j < coCount; j++) {
                    final double y1 = ((DoubleValue) cells[j]).getDoubleValue();
                    final double y2 = ((DoubleValue) cells2[j]).getDoubleValue();
                    if (x1 < x2 && y1 < y2) {
                        // values are concordant
                        cMatrix[i][j]++;
                    } else if (x1 < x2 && y1 > y2) {
                        // values are discordant
                        dMatrix[i][j]++;
                    } else if (x1 != x2 && y1 == y2) {
                        // values are bounded in y
                        tyMatrix[i][j]++;
                    } else if (x1 == x2 && y1 != y2) {
                        // values are bounded in x
                        txMatrix[i][j]++;
                    } else {
                    // (x1 == x2 && y1 == y2) { values are bounded in x and y
                    // txyMatrix[i][j]++; // no measure need this count
                    }
                }
            }
        }
        exec.checkCanceled();
        exec.setProgress(0.95 * rowIndex / rowCount, String.format("Calculating - %d/%d (\"%s\")", rowIndex, rowCount, r.getKey()));
        rowIndex++;
    }
    if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KENDALLA)) {
        double nrOfRows = m_rank.getRowCount();
        // kendalls Tau a
        double divisor = (nrOfRows * (nrOfRows - 1.0)) * 0.5;
        for (int i = 0; i < coCount; i++) {
            for (int j = i + 1; j < coCount; j++) {
                nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / divisor);
            }
            exec.setProgress(0.05 * i / coCount, "Calculating correlations");
        }
    } else if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KENDALLB)) {
        // kendalls Tau b
        for (int i = 0; i < coCount; i++) {
            for (int j = i + 1; j < coCount; j++) {
                double div = Math.sqrt(cMatrix[i][j] + dMatrix[i][j] + txMatrix[i][j]) * Math.sqrt(cMatrix[i][j] + dMatrix[i][j] + tyMatrix[i][j]);
                nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / div);
            }
            exec.setProgress(0.05 * i / coCount, "Calculating correlations");
        }
    } else if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KRUSKALAL)) {
        // Kruskals Gamma
        for (int i = 0; i < coCount; i++) {
            for (int j = i + 1; j < coCount; j++) {
                nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / (cMatrix[i][j] + dMatrix[i][j]));
            }
            exec.setProgress(0.05 * i / coCount, "Calculating correlations");
        }
    }
    return nominatorMatrix;
}
Also used : DoubleValue(org.knime.core.data.DoubleValue) HalfDoubleMatrix(org.knime.base.util.HalfDoubleMatrix) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow)

Example 10 with DoubleValue

use of org.knime.core.data.DoubleValue in project knime-core by knime.

the class TwoSampleTTest method execute.

public TwoSampleTTestStatistics[] execute(final BufferedDataTable table, final ExecutionContext exec) throws InvalidSettingsException, CanceledExecutionException {
    DataTableSpec spec = table.getDataTableSpec();
    int groupingIndex = spec.findColumnIndex(m_grouping.getColumn());
    if (groupingIndex == -1) {
        throw new InvalidSettingsException("Grouping column not found.");
    }
    int[] testColumnsIndex = new int[m_testColumns.length];
    for (int i = 0; i < testColumnsIndex.length; i++) {
        testColumnsIndex[i] = spec.findColumnIndex(m_testColumns[i]);
    }
    int testColumnCount = m_testColumns.length;
    TwoSampleTTestStatistics[] result = new TwoSampleTTestStatistics[testColumnCount];
    for (int i = 0; i < testColumnCount; i++) {
        result[i] = new TwoSampleTTestStatistics(m_testColumns[i], m_grouping.getGroupLabels(), m_confidenceIntervalProb);
    }
    final int rowCount = table.getRowCount();
    int rowIndex = 0;
    for (DataRow row : table) {
        exec.checkCanceled();
        exec.setProgress(rowIndex++ / (double) rowCount, rowIndex + "/" + rowCount + " (\"" + row.getKey() + "\")");
        DataCell groupCell = row.getCell(groupingIndex);
        Group group = m_grouping.getGroup(groupCell);
        for (int i = 0; i < testColumnCount; i++) {
            if (group == null) {
                if (groupCell.isMissing()) {
                    result[i].addMissingGroup();
                } else {
                    result[i].addIgnoredGroup();
                }
                continue;
            }
            DataCell cell = row.getCell(testColumnsIndex[i]);
            if (!cell.isMissing()) {
                DoubleValue value = (DoubleValue) cell;
                result[i].addValue(value.getDoubleValue(), group);
            } else {
                result[i].addMissing(group);
            }
        }
    }
    return result;
}
Also used : Group(org.knime.base.node.stats.testing.ttest.Grouping.Group) DataTableSpec(org.knime.core.data.DataTableSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow)

Aggregations

DoubleValue (org.knime.core.data.DoubleValue)154 DataCell (org.knime.core.data.DataCell)103 DataRow (org.knime.core.data.DataRow)71 DataColumnSpec (org.knime.core.data.DataColumnSpec)38 DataTableSpec (org.knime.core.data.DataTableSpec)38 DoubleCell (org.knime.core.data.def.DoubleCell)32 ArrayList (java.util.ArrayList)26 BufferedDataTable (org.knime.core.node.BufferedDataTable)26 DataType (org.knime.core.data.DataType)23 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)21 LinkedHashMap (java.util.LinkedHashMap)18 IntValue (org.knime.core.data.IntValue)15 HashMap (java.util.HashMap)14 RowIterator (org.knime.core.data.RowIterator)14 RowKey (org.knime.core.data.RowKey)13 DefaultRow (org.knime.core.data.def.DefaultRow)13 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)12 LongValue (org.knime.core.data.LongValue)10 StringValue (org.knime.core.data.StringValue)10 DateAndTimeValue (org.knime.core.data.date.DateAndTimeValue)10