Search in sources :

Example 26 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class HistogramColumn method constructFromDataArray.

/**
 * Constructs the helper data structures from the numeric hostigran models and the data as {@link DataArray}.
 *
 * @param histograms The numeric histograms.
 * @param data The input data.
 * @param nominalColumnNames The nominal column names.
 * @return The helper data structures.
 * @see #construct(Map, DataTable, Set)
 */
protected static Pair<Map<Integer, Map<Integer, Set<RowKey>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> constructFromDataArray(final Map<Integer, HistogramNumericModel> histograms, final DataTable data, final Set<String> nominalColumnNames) {
    Map<Integer, Map<Integer, Set<RowKey>>> numericMapping = new HashMap<Integer, Map<Integer, Set<RowKey>>>();
    Map<Integer, Map<DataValue, Set<RowKey>>> nominalMapping = new HashMap<Integer, Map<DataValue, Set<RowKey>>>();
    DataTableSpec tableSpec = data.getDataTableSpec();
    for (DataColumnSpec colSpec : tableSpec) {
        int colIndex = tableSpec.findColumnIndex(colSpec.getName());
        if (colSpec.getType().isCompatible(DoubleValue.class)) {
            // + colIndex;
            if (histograms.containsKey(Integer.valueOf(colIndex)) && histograms.get(colIndex) != null) {
                numericMapping.put(colIndex, new HashMap<Integer, Set<RowKey>>());
            }
        }
        if (colSpec.getDomain().hasValues() || nominalColumnNames.contains(colSpec.getName())) {
            nominalMapping.put(colIndex, new HashMap<DataValue, Set<RowKey>>());
        }
    }
    for (DataRow dataRow : data) {
        for (Entry<Integer, Map<Integer, Set<RowKey>>> outer : numericMapping.entrySet()) {
            Integer key = outer.getKey();
            DataCell cell = dataRow.getCell(key);
            if (cell instanceof DoubleValue) {
                DoubleValue dv = (DoubleValue) cell;
                Integer bin = Integer.valueOf(histograms.get(key).findBin(dv));
                Map<Integer, Set<RowKey>> inner = outer.getValue();
                if (!inner.containsKey(bin)) {
                    inner.put(bin, new HashSet<RowKey>());
                }
                inner.get(bin).add(dataRow.getKey());
            }
        }
        for (Entry<Integer, Map<DataValue, Set<RowKey>>> outer : nominalMapping.entrySet()) {
            int key = outer.getKey().intValue();
            DataCell cell = dataRow.getCell(key);
            if (!cell.isMissing()) /* && cell instanceof NominalValue*/
            {
                Map<DataValue, Set<RowKey>> inner = outer.getValue();
                if (!inner.containsKey(cell)) {
                    inner.put(cell, new HashSet<RowKey>());
                }
                inner.get(cell).add(dataRow.getKey());
            }
        }
    }
    return Pair.create(numericMapping, nominalMapping);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) Set(java.util.Set) HashSet(java.util.HashSet) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DataValue(org.knime.core.data.DataValue) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 27 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class HistogramColumn method loadHistograms.

/**
 * Loads the histograms from the saved internal files.
 *
 * @param histogramsGz The file for the histograms.
 * @param dataArrayGz The data array file for the row keys.
 * @param nominalColumns The nominal columns.
 * @param strategy The strategy used to compute the bins.
 * @param means The mean values for the numeric columns.
 * @return A triple (Pair(Pair(,),)) of histograms, numeric and nominal row keys.
 * @throws IOException Failed to read the files.
 * @throws InvalidSettingsException Something went wrong.
 */
public static Pair<Pair<Map<Integer, ? extends HistogramModel<?>>, Map<Integer, Map<Integer, Set<RowKey>>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> loadHistograms(final File histogramsGz, final File dataArrayGz, final Set<String> nominalColumns, final BinNumberSelectionStrategy strategy, final double[] means) throws IOException, InvalidSettingsException {
    Map<Integer, Map<Integer, Set<RowKey>>> numericKeys = new HashMap<Integer, Map<Integer, Set<RowKey>>>();
    Map<Integer, HistogramNumericModel> histograms = loadHistogramsPrivate(histogramsGz, numericKeys, strategy, means);
    Map<Integer, Map<DataValue, Set<RowKey>>> nominalKeys = new HashMap<Integer, Map<DataValue, Set<RowKey>>>();
    ContainerTable table = DataContainer.readFromZip(dataArrayGz);
    Set<Integer> numericColIndices = numericKeys.keySet();
    for (String colName : nominalColumns) {
        int colIndex = table.getDataTableSpec().findColumnIndex(colName);
        if (colIndex < 0) {
            continue;
        }
        nominalKeys.put(Integer.valueOf(colIndex), new HashMap<DataValue, Set<RowKey>>());
    }
    for (DataRow dataRow : table) {
        for (Integer col : numericColIndices) {
            // Integer col = Integer.valueOf(colIdx);
            HistogramNumericModel hd = histograms.get(col);
            Map<Integer, Set<RowKey>> map = numericKeys.get(col);
            DataCell cell = dataRow.getCell(col.intValue());
            if (!cell.isMissing() && cell instanceof DoubleValue) {
                DoubleValue dv = (DoubleValue) cell;
                Integer bin = Integer.valueOf(hd.findBin(dv));
                if (!map.containsKey(bin)) {
                    map.put(bin, new HashSet<RowKey>());
                }
                map.get(bin).add(dataRow.getKey());
            }
        }
        for (Entry<Integer, Map<DataValue, Set<RowKey>>> entry : nominalKeys.entrySet()) {
            DataCell value = dataRow.getCell(entry.getKey().intValue());
            Map<DataValue, Set<RowKey>> map = entry.getValue();
            if (!map.containsKey(value)) {
                map.put(value, new HashSet<RowKey>());
            }
            map.get(value).add(dataRow.getKey());
        }
    }
    return Pair.create(new Pair<Map<Integer, ? extends HistogramModel<?>>, Map<Integer, Map<Integer, Set<RowKey>>>>(histograms, numericKeys), nominalKeys);
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DataValue(org.knime.core.data.DataValue) DataRow(org.knime.core.data.DataRow) ContainerTable(org.knime.core.data.container.ContainerTable) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 28 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class RankCorrelationComputeNodeModel method filterMissings.

/**
 * @param filteredTable a Buffered Data Table.
 * @param exec The execution context
 * @return the table without any rows containing missing values.
 */
private BufferedDataTable filterMissings(final BufferedDataTable filteredTable, final ExecutionContext exec) {
    BufferedDataContainer tab = exec.createDataContainer(filteredTable.getDataTableSpec());
    for (DataRow row : filteredTable) {
        boolean includeRow = true;
        // check row for missingvalues
        for (DataCell cell : row) {
            if (cell.isMissing()) {
                includeRow = false;
                break;
            }
        }
        if (includeRow) {
            tab.addRowToTable(row);
        }
    }
    tab.close();
    return tab.getTable();
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow)

Example 29 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class SortedCorrelationComputer method calculateKendall.

/**
 * Calculates the kendall rank for all pairs of Data table columns based on previously calculated ranks.
 *
 * @param exec the Execution context.
 * @param corrType the type of correlation used, as defined in CorrelationComputeNodeModel
 * @return the output matrix to be turned into the output model
 * @throws CanceledExecutionException if canceled by users
 */
HalfDoubleMatrix calculateKendall(final String corrType, final ExecutionMonitor exec) throws CanceledExecutionException {
    // the ranking must have been calculated before
    assert (m_rank != null);
    final int coCount = m_rank.getDataTableSpec().getNumColumns();
    HalfDoubleMatrix nominatorMatrix = new HalfDoubleMatrix(coCount, /*includeDiagonal=*/
    false);
    double[][] cMatrix = new double[coCount][coCount];
    double[][] dMatrix = new double[coCount][coCount];
    double[][] txMatrix = new double[coCount][coCount];
    double[][] tyMatrix = new double[coCount][coCount];
    // double[][] txyMatrix = new double[coCount][coCount];
    final DataCell[] cells = new DataCell[m_rank.getDataTableSpec().getNumColumns()];
    final DataCell[] cells2 = new DataCell[m_rank.getDataTableSpec().getNumColumns()];
    int rowIndex = 0;
    final int rowCount = m_rank.getRowCount();
    for (DataRow r : m_rank) {
        // multiple times, so we buffer it
        for (int i = 0; i < cells.length; i++) {
            cells[i] = r.getCell(i);
        }
        for (DataRow r2 : m_rank) {
            exec.checkCanceled();
            // multiple times, so we buffer it
            for (int i = 0; i < cells2.length; i++) {
                cells2[i] = r2.getCell(i);
            }
            for (int i = 0; i < coCount; i++) {
                final double x1 = ((DoubleValue) cells[i]).getDoubleValue();
                final double x2 = ((DoubleValue) cells2[i]).getDoubleValue();
                for (int j = 0; j < coCount; j++) {
                    final double y1 = ((DoubleValue) cells[j]).getDoubleValue();
                    final double y2 = ((DoubleValue) cells2[j]).getDoubleValue();
                    if (x1 < x2 && y1 < y2) {
                        // values are concordant
                        cMatrix[i][j]++;
                    } else if (x1 < x2 && y1 > y2) {
                        // values are discordant
                        dMatrix[i][j]++;
                    } else if (x1 != x2 && y1 == y2) {
                        // values are bounded in y
                        tyMatrix[i][j]++;
                    } else if (x1 == x2 && y1 != y2) {
                        // values are bounded in x
                        txMatrix[i][j]++;
                    } else {
                    // (x1 == x2 && y1 == y2) { values are bounded in x and y
                    // txyMatrix[i][j]++; // no measure need this count
                    }
                }
            }
        }
        exec.checkCanceled();
        exec.setProgress(0.95 * rowIndex / rowCount, String.format("Calculating - %d/%d (\"%s\")", rowIndex, rowCount, r.getKey()));
        rowIndex++;
    }
    if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KENDALLA)) {
        double nrOfRows = m_rank.getRowCount();
        // kendalls Tau a
        double divisor = (nrOfRows * (nrOfRows - 1.0)) * 0.5;
        for (int i = 0; i < coCount; i++) {
            for (int j = i + 1; j < coCount; j++) {
                nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / divisor);
            }
            exec.setProgress(0.05 * i / coCount, "Calculating correlations");
        }
    } else if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KENDALLB)) {
        // kendalls Tau b
        for (int i = 0; i < coCount; i++) {
            for (int j = i + 1; j < coCount; j++) {
                double div = Math.sqrt(cMatrix[i][j] + dMatrix[i][j] + txMatrix[i][j]) * Math.sqrt(cMatrix[i][j] + dMatrix[i][j] + tyMatrix[i][j]);
                nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / div);
            }
            exec.setProgress(0.05 * i / coCount, "Calculating correlations");
        }
    } else if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KRUSKALAL)) {
        // Kruskals Gamma
        for (int i = 0; i < coCount; i++) {
            for (int j = i + 1; j < coCount; j++) {
                nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / (cMatrix[i][j] + dMatrix[i][j]));
            }
            exec.setProgress(0.05 * i / coCount, "Calculating correlations");
        }
    }
    return nominatorMatrix;
}
Also used : DoubleValue(org.knime.core.data.DoubleValue) HalfDoubleMatrix(org.knime.base.util.HalfDoubleMatrix) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow)

Example 30 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class NewToOldTimeNodeModel method createStreamableOperator.

/**
 * {@inheritDoc}
 */
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    return new StreamableOperator() {

        @Override
        public StreamableOperatorInternals saveInternals() {
            return null;
        }

        @Override
        public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
            final RowInput in = (RowInput) inputs[0];
            final RowOutput out = (RowOutput) outputs[0];
            final DataTableSpec inSpec = in.getDataTableSpec();
            String[] includeList = m_colSelect.applyTo(inSpec).getIncludes();
            final int[] includeIndeces = Arrays.stream(includeList).mapToInt(s -> inSpec.findColumnIndex(s)).toArray();
            DataRow row;
            while ((row = in.poll()) != null) {
                exec.checkCanceled();
                DataCell[] datacells = new DataCell[includeIndeces.length];
                for (int i = 0; i < includeIndeces.length; i++) {
                    if (m_isReplaceOrAppend.getStringValue().equals(OPTION_REPLACE)) {
                        final DataColumnSpecCreator dataColumnSpecCreator = new DataColumnSpecCreator(includeList[i], DateAndTimeCell.TYPE);
                        final ConvertTimeCellFactory cellFac = new ConvertTimeCellFactory(dataColumnSpecCreator.createSpec(), includeIndeces[i]);
                        datacells[i] = cellFac.getCells(row)[0];
                    } else {
                        final DataColumnSpec dataColSpec = new UniqueNameGenerator(inSpec).newColumn(includeList[i] + m_suffix.getStringValue(), DateAndTimeCell.TYPE);
                        final ConvertTimeCellFactory cellFac = new ConvertTimeCellFactory(dataColSpec, includeIndeces[i]);
                        datacells[i] = cellFac.getCells(row)[0];
                    }
                }
                if (m_isReplaceOrAppend.getStringValue().equals(OPTION_REPLACE)) {
                    out.push(new ReplacedColumnsDataRow(row, datacells, includeIndeces));
                } else {
                    out.push(new AppendedColumnRow(row, datacells));
                }
            }
            in.close();
            out.close();
        }
    };
}
Also used : Arrays(java.util.Arrays) NodeSettingsRO(org.knime.core.node.NodeSettingsRO) DataTableSpec(org.knime.core.data.DataTableSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) ZonedDateTime(java.time.ZonedDateTime) UniqueNameGenerator(org.knime.core.util.UniqueNameGenerator) LocalDateTime(java.time.LocalDateTime) AppendedColumnRow(org.knime.core.data.append.AppendedColumnRow) LocalTimeValue(org.knime.core.data.time.localtime.LocalTimeValue) LocalDateValue(org.knime.core.data.time.localdate.LocalDateValue) ExecutionContext(org.knime.core.node.ExecutionContext) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) StreamableOperatorInternals(org.knime.core.node.streamable.StreamableOperatorInternals) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) LocalTime(java.time.LocalTime) ZoneOffset(java.time.ZoneOffset) DataCell(org.knime.core.data.DataCell) ZonedDateTimeValue(org.knime.core.data.time.zoneddatetime.ZonedDateTimeValue) PortInput(org.knime.core.node.streamable.PortInput) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) PartitionInfo(org.knime.core.node.streamable.PartitionInfo) RowInput(org.knime.core.node.streamable.RowInput) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) IOException(java.io.IOException) SettingsModelColumnFilter2(org.knime.core.node.defaultnodesettings.SettingsModelColumnFilter2) OutputPortRole(org.knime.core.node.streamable.OutputPortRole) ReplacedColumnsDataRow(org.knime.base.data.replace.ReplacedColumnsDataRow) NodeModel(org.knime.core.node.NodeModel) File(java.io.File) DataRow(org.knime.core.data.DataRow) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) PortOutput(org.knime.core.node.streamable.PortOutput) NodeSettingsWO(org.knime.core.node.NodeSettingsWO) TimeUnit(java.util.concurrent.TimeUnit) BufferedDataTable(org.knime.core.node.BufferedDataTable) LocalDateTimeValue(org.knime.core.data.time.localdatetime.LocalDateTimeValue) InputPortRole(org.knime.core.node.streamable.InputPortRole) LocalDate(java.time.LocalDate) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DateAndTimeCell(org.knime.core.data.date.DateAndTimeCell) RowOutput(org.knime.core.node.streamable.RowOutput) StreamableOperator(org.knime.core.node.streamable.StreamableOperator) DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) StreamableOperator(org.knime.core.node.streamable.StreamableOperator) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) RowInput(org.knime.core.node.streamable.RowInput) ReplacedColumnsDataRow(org.knime.base.data.replace.ReplacedColumnsDataRow) DataRow(org.knime.core.data.DataRow) UniqueNameGenerator(org.knime.core.util.UniqueNameGenerator) RowOutput(org.knime.core.node.streamable.RowOutput) ExecutionContext(org.knime.core.node.ExecutionContext) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataCell(org.knime.core.data.DataCell) ReplacedColumnsDataRow(org.knime.base.data.replace.ReplacedColumnsDataRow) AppendedColumnRow(org.knime.core.data.append.AppendedColumnRow)

Aggregations

DataRow (org.knime.core.data.DataRow)482 DataCell (org.knime.core.data.DataCell)268 DataTableSpec (org.knime.core.data.DataTableSpec)159 BufferedDataTable (org.knime.core.node.BufferedDataTable)125 DataColumnSpec (org.knime.core.data.DataColumnSpec)109 RowKey (org.knime.core.data.RowKey)88 DefaultRow (org.knime.core.data.def.DefaultRow)88 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)80 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)76 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)73 DoubleValue (org.knime.core.data.DoubleValue)72 ArrayList (java.util.ArrayList)65 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)65 RowIterator (org.knime.core.data.RowIterator)62 DataType (org.knime.core.data.DataType)61 DoubleCell (org.knime.core.data.def.DoubleCell)57 StringCell (org.knime.core.data.def.StringCell)53 SingleCellFactory (org.knime.core.data.container.SingleCellFactory)48 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)44 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)43