Search in sources :

Example 26 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class HistogramColumn method createColumnRearranger.

/**
 * Creates the rearranger that adds the histograms.
 *
 * @param data The input data table that contains the columns referred by {@code histograms} keys.
 * @param stats The statistics table to be adjusted.
 * @param histograms The histograms.
 * @param columns The columns to be described.
 * @return The {@link ColumnRearranger}.
 */
ColumnRearranger createColumnRearranger(final BufferedDataTable data, final BufferedDataTable stats, final Map<Integer, HistogramNumericModel> histograms, final int maxBinCount, final String... columns) {
    ColumnRearranger rearranger = new ColumnRearranger(stats.getDataTableSpec());
    final DataColumnSpec spec = createHistogramColumnSpec();
    rearranger.append(new SingleCellFactory(true, spec) {

        String[] m_sortedColumns = columns.clone();

        {
            Arrays.sort(m_sortedColumns);
        }

        @Override
        public DataCell getCell(final DataRow row) {
            if (Arrays.binarySearch(m_sortedColumns, row.getKey().getString()) < 0) {
                return DataType.getMissingCell();
            }
            final int columnIndex = data.getSpec().findColumnIndex(row.getKey().getString());
            final HistogramNumericModel histogramData = histograms.get(Integer.valueOf(columnIndex));
            if (histogramData == null) {
                // Wrong bounds
                return DataType.getMissingCell();
            }
            assert columnIndex == histogramData.getColIndex() : "Expected: " + columnIndex + ", but got: " + histogramData.getColIndex();
            return createImageCell(histogramData, false);
        }
    });
    return rearranger;
}
Also used : ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataCell(org.knime.core.data.DataCell) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) DataRow(org.knime.core.data.DataRow)

Example 27 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class HistogramColumn method constructFromDataArray.

/**
 * Constructs the helper data structures from the numeric hostigran models and the data as {@link DataArray}.
 *
 * @param histograms The numeric histograms.
 * @param data The input data.
 * @param nominalColumnNames The nominal column names.
 * @return The helper data structures.
 * @see #construct(Map, DataTable, Set)
 */
protected static Pair<Map<Integer, Map<Integer, Set<RowKey>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> constructFromDataArray(final Map<Integer, HistogramNumericModel> histograms, final DataTable data, final Set<String> nominalColumnNames) {
    Map<Integer, Map<Integer, Set<RowKey>>> numericMapping = new HashMap<Integer, Map<Integer, Set<RowKey>>>();
    Map<Integer, Map<DataValue, Set<RowKey>>> nominalMapping = new HashMap<Integer, Map<DataValue, Set<RowKey>>>();
    DataTableSpec tableSpec = data.getDataTableSpec();
    for (DataColumnSpec colSpec : tableSpec) {
        int colIndex = tableSpec.findColumnIndex(colSpec.getName());
        if (colSpec.getType().isCompatible(DoubleValue.class)) {
            // + colIndex;
            if (histograms.containsKey(Integer.valueOf(colIndex)) && histograms.get(colIndex) != null) {
                numericMapping.put(colIndex, new HashMap<Integer, Set<RowKey>>());
            }
        }
        if (colSpec.getDomain().hasValues() || nominalColumnNames.contains(colSpec.getName())) {
            nominalMapping.put(colIndex, new HashMap<DataValue, Set<RowKey>>());
        }
    }
    for (DataRow dataRow : data) {
        for (Entry<Integer, Map<Integer, Set<RowKey>>> outer : numericMapping.entrySet()) {
            Integer key = outer.getKey();
            DataCell cell = dataRow.getCell(key);
            if (cell instanceof DoubleValue) {
                DoubleValue dv = (DoubleValue) cell;
                Integer bin = Integer.valueOf(histograms.get(key).findBin(dv));
                Map<Integer, Set<RowKey>> inner = outer.getValue();
                if (!inner.containsKey(bin)) {
                    inner.put(bin, new HashSet<RowKey>());
                }
                inner.get(bin).add(dataRow.getKey());
            }
        }
        for (Entry<Integer, Map<DataValue, Set<RowKey>>> outer : nominalMapping.entrySet()) {
            int key = outer.getKey().intValue();
            DataCell cell = dataRow.getCell(key);
            if (!cell.isMissing()) /* && cell instanceof NominalValue*/
            {
                Map<DataValue, Set<RowKey>> inner = outer.getValue();
                if (!inner.containsKey(cell)) {
                    inner.put(cell, new HashSet<RowKey>());
                }
                inner.get(cell).add(dataRow.getKey());
            }
        }
    }
    return Pair.create(numericMapping, nominalMapping);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) Set(java.util.Set) HashSet(java.util.HashSet) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DataValue(org.knime.core.data.DataValue) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 28 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class HistogramColumn method loadHistograms.

/**
 * Loads the histograms from the saved internal files.
 *
 * @param histogramsGz The file for the histograms.
 * @param dataArrayGz The data array file for the row keys.
 * @param nominalColumns The nominal columns.
 * @param strategy The strategy used to compute the bins.
 * @param means The mean values for the numeric columns.
 * @return A triple (Pair(Pair(,),)) of histograms, numeric and nominal row keys.
 * @throws IOException Failed to read the files.
 * @throws InvalidSettingsException Something went wrong.
 */
public static Pair<Pair<Map<Integer, ? extends HistogramModel<?>>, Map<Integer, Map<Integer, Set<RowKey>>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> loadHistograms(final File histogramsGz, final File dataArrayGz, final Set<String> nominalColumns, final BinNumberSelectionStrategy strategy, final double[] means) throws IOException, InvalidSettingsException {
    Map<Integer, Map<Integer, Set<RowKey>>> numericKeys = new HashMap<Integer, Map<Integer, Set<RowKey>>>();
    Map<Integer, HistogramNumericModel> histograms = loadHistogramsPrivate(histogramsGz, numericKeys, strategy, means);
    Map<Integer, Map<DataValue, Set<RowKey>>> nominalKeys = new HashMap<Integer, Map<DataValue, Set<RowKey>>>();
    ContainerTable table = DataContainer.readFromZip(dataArrayGz);
    Set<Integer> numericColIndices = numericKeys.keySet();
    for (String colName : nominalColumns) {
        int colIndex = table.getDataTableSpec().findColumnIndex(colName);
        if (colIndex < 0) {
            continue;
        }
        nominalKeys.put(Integer.valueOf(colIndex), new HashMap<DataValue, Set<RowKey>>());
    }
    for (DataRow dataRow : table) {
        for (Integer col : numericColIndices) {
            // Integer col = Integer.valueOf(colIdx);
            HistogramNumericModel hd = histograms.get(col);
            Map<Integer, Set<RowKey>> map = numericKeys.get(col);
            DataCell cell = dataRow.getCell(col.intValue());
            if (!cell.isMissing() && cell instanceof DoubleValue) {
                DoubleValue dv = (DoubleValue) cell;
                Integer bin = Integer.valueOf(hd.findBin(dv));
                if (!map.containsKey(bin)) {
                    map.put(bin, new HashSet<RowKey>());
                }
                map.get(bin).add(dataRow.getKey());
            }
        }
        for (Entry<Integer, Map<DataValue, Set<RowKey>>> entry : nominalKeys.entrySet()) {
            DataCell value = dataRow.getCell(entry.getKey().intValue());
            Map<DataValue, Set<RowKey>> map = entry.getValue();
            if (!map.containsKey(value)) {
                map.put(value, new HashSet<RowKey>());
            }
            map.get(value).add(dataRow.getKey());
        }
    }
    return Pair.create(new Pair<Map<Integer, ? extends HistogramModel<?>>, Map<Integer, Map<Integer, Set<RowKey>>>>(histograms, numericKeys), nominalKeys);
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DataValue(org.knime.core.data.DataValue) DataRow(org.knime.core.data.DataRow) ContainerTable(org.knime.core.data.container.ContainerTable) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 29 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class RankCorrelationComputeNodeModel method filterMissings.

/**
 * @param filteredTable a Buffered Data Table.
 * @param exec The execution context
 * @return the table without any rows containing missing values.
 */
private BufferedDataTable filterMissings(final BufferedDataTable filteredTable, final ExecutionContext exec) {
    BufferedDataContainer tab = exec.createDataContainer(filteredTable.getDataTableSpec());
    for (DataRow row : filteredTable) {
        boolean includeRow = true;
        // check row for missingvalues
        for (DataCell cell : row) {
            if (cell.isMissing()) {
                includeRow = false;
                break;
            }
        }
        if (includeRow) {
            tab.addRowToTable(row);
        }
    }
    tab.close();
    return tab.getTable();
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow)

Example 30 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class SortedCorrelationComputer method calculateKendall.

/**
 * Calculates the kendall rank for all pairs of Data table columns based on previously calculated ranks.
 *
 * @param exec the Execution context.
 * @param corrType the type of correlation used, as defined in CorrelationComputeNodeModel
 * @return the output matrix to be turned into the output model
 * @throws CanceledExecutionException if canceled by users
 */
HalfDoubleMatrix calculateKendall(final String corrType, final ExecutionMonitor exec) throws CanceledExecutionException {
    // the ranking must have been calculated before
    assert (m_rank != null);
    final int coCount = m_rank.getDataTableSpec().getNumColumns();
    HalfDoubleMatrix nominatorMatrix = new HalfDoubleMatrix(coCount, /*includeDiagonal=*/
    false);
    double[][] cMatrix = new double[coCount][coCount];
    double[][] dMatrix = new double[coCount][coCount];
    double[][] txMatrix = new double[coCount][coCount];
    double[][] tyMatrix = new double[coCount][coCount];
    // double[][] txyMatrix = new double[coCount][coCount];
    final DataCell[] cells = new DataCell[m_rank.getDataTableSpec().getNumColumns()];
    final DataCell[] cells2 = new DataCell[m_rank.getDataTableSpec().getNumColumns()];
    int rowIndex = 0;
    final int rowCount = m_rank.getRowCount();
    for (DataRow r : m_rank) {
        // multiple times, so we buffer it
        for (int i = 0; i < cells.length; i++) {
            cells[i] = r.getCell(i);
        }
        for (DataRow r2 : m_rank) {
            exec.checkCanceled();
            // multiple times, so we buffer it
            for (int i = 0; i < cells2.length; i++) {
                cells2[i] = r2.getCell(i);
            }
            for (int i = 0; i < coCount; i++) {
                final double x1 = ((DoubleValue) cells[i]).getDoubleValue();
                final double x2 = ((DoubleValue) cells2[i]).getDoubleValue();
                for (int j = 0; j < coCount; j++) {
                    final double y1 = ((DoubleValue) cells[j]).getDoubleValue();
                    final double y2 = ((DoubleValue) cells2[j]).getDoubleValue();
                    if (x1 < x2 && y1 < y2) {
                        // values are concordant
                        cMatrix[i][j]++;
                    } else if (x1 < x2 && y1 > y2) {
                        // values are discordant
                        dMatrix[i][j]++;
                    } else if (x1 != x2 && y1 == y2) {
                        // values are bounded in y
                        tyMatrix[i][j]++;
                    } else if (x1 == x2 && y1 != y2) {
                        // values are bounded in x
                        txMatrix[i][j]++;
                    } else {
                    // (x1 == x2 && y1 == y2) { values are bounded in x and y
                    // txyMatrix[i][j]++; // no measure need this count
                    }
                }
            }
        }
        exec.checkCanceled();
        exec.setProgress(0.95 * rowIndex / rowCount, String.format("Calculating - %d/%d (\"%s\")", rowIndex, rowCount, r.getKey()));
        rowIndex++;
    }
    if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KENDALLA)) {
        double nrOfRows = m_rank.getRowCount();
        // kendalls Tau a
        double divisor = (nrOfRows * (nrOfRows - 1.0)) * 0.5;
        for (int i = 0; i < coCount; i++) {
            for (int j = i + 1; j < coCount; j++) {
                nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / divisor);
            }
            exec.setProgress(0.05 * i / coCount, "Calculating correlations");
        }
    } else if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KENDALLB)) {
        // kendalls Tau b
        for (int i = 0; i < coCount; i++) {
            for (int j = i + 1; j < coCount; j++) {
                double div = Math.sqrt(cMatrix[i][j] + dMatrix[i][j] + txMatrix[i][j]) * Math.sqrt(cMatrix[i][j] + dMatrix[i][j] + tyMatrix[i][j]);
                nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / div);
            }
            exec.setProgress(0.05 * i / coCount, "Calculating correlations");
        }
    } else if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KRUSKALAL)) {
        // Kruskals Gamma
        for (int i = 0; i < coCount; i++) {
            for (int j = i + 1; j < coCount; j++) {
                nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / (cMatrix[i][j] + dMatrix[i][j]));
            }
            exec.setProgress(0.05 * i / coCount, "Calculating correlations");
        }
    }
    return nominatorMatrix;
}
Also used : DoubleValue(org.knime.core.data.DoubleValue) HalfDoubleMatrix(org.knime.base.util.HalfDoubleMatrix) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow)

Aggregations

DataCell (org.knime.core.data.DataCell)780 DataRow (org.knime.core.data.DataRow)268 DataTableSpec (org.knime.core.data.DataTableSpec)175 DataColumnSpec (org.knime.core.data.DataColumnSpec)170 DefaultRow (org.knime.core.data.def.DefaultRow)169 ArrayList (java.util.ArrayList)141 StringCell (org.knime.core.data.def.StringCell)131 DoubleCell (org.knime.core.data.def.DoubleCell)129 DoubleValue (org.knime.core.data.DoubleValue)111 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)109 DataType (org.knime.core.data.DataType)97 RowKey (org.knime.core.data.RowKey)94 BufferedDataTable (org.knime.core.node.BufferedDataTable)93 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)91 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)84 LinkedHashMap (java.util.LinkedHashMap)81 IntCell (org.knime.core.data.def.IntCell)79 HashMap (java.util.HashMap)60 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)57 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)56