Search in sources :

Example 26 with DataRow

use of in project knime-core by knime.

the class HistogramColumn method constructFromDataArray.

 * Constructs the helper data structures from the numeric hostigran models and the data as {@link DataArray}.
 * @param histograms The numeric histograms.
 * @param data The input data.
 * @param nominalColumnNames The nominal column names.
 * @return The helper data structures.
 * @see #construct(Map, DataTable, Set)
protected static Pair<Map<Integer, Map<Integer, Set<RowKey>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> constructFromDataArray(final Map<Integer, HistogramNumericModel> histograms, final DataTable data, final Set<String> nominalColumnNames) {
    Map<Integer, Map<Integer, Set<RowKey>>> numericMapping = new HashMap<Integer, Map<Integer, Set<RowKey>>>();
    Map<Integer, Map<DataValue, Set<RowKey>>> nominalMapping = new HashMap<Integer, Map<DataValue, Set<RowKey>>>();
    DataTableSpec tableSpec = data.getDataTableSpec();
    for (DataColumnSpec colSpec : tableSpec) {
        int colIndex = tableSpec.findColumnIndex(colSpec.getName());
        if (colSpec.getType().isCompatible(DoubleValue.class)) {
            // + colIndex;
            if (histograms.containsKey(Integer.valueOf(colIndex)) && histograms.get(colIndex) != null) {
                numericMapping.put(colIndex, new HashMap<Integer, Set<RowKey>>());
        if (colSpec.getDomain().hasValues() || nominalColumnNames.contains(colSpec.getName())) {
            nominalMapping.put(colIndex, new HashMap<DataValue, Set<RowKey>>());
    for (DataRow dataRow : data) {
        for (Entry<Integer, Map<Integer, Set<RowKey>>> outer : numericMapping.entrySet()) {
            Integer key = outer.getKey();
            DataCell cell = dataRow.getCell(key);
            if (cell instanceof DoubleValue) {
                DoubleValue dv = (DoubleValue) cell;
                Integer bin = Integer.valueOf(histograms.get(key).findBin(dv));
                Map<Integer, Set<RowKey>> inner = outer.getValue();
                if (!inner.containsKey(bin)) {
                    inner.put(bin, new HashSet<RowKey>());
        for (Entry<Integer, Map<DataValue, Set<RowKey>>> outer : nominalMapping.entrySet()) {
            int key = outer.getKey().intValue();
            DataCell cell = dataRow.getCell(key);
            if (!cell.isMissing()) /* && cell instanceof NominalValue*/
                Map<DataValue, Set<RowKey>> inner = outer.getValue();
                if (!inner.containsKey(cell)) {
                    inner.put(cell, new HashSet<RowKey>());
    return Pair.create(numericMapping, nominalMapping);
Also used : DataTableSpec( Set(java.util.Set) HashSet(java.util.HashSet) RowKey( HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DataValue( DataRow( DataColumnSpec( DoubleValue( DataCell( Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 27 with DataRow

use of in project knime-core by knime.

the class HistogramColumn method loadHistograms.

 * Loads the histograms from the saved internal files.
 * @param histogramsGz The file for the histograms.
 * @param dataArrayGz The data array file for the row keys.
 * @param nominalColumns The nominal columns.
 * @param strategy The strategy used to compute the bins.
 * @param means The mean values for the numeric columns.
 * @return A triple (Pair(Pair(,),)) of histograms, numeric and nominal row keys.
 * @throws IOException Failed to read the files.
 * @throws InvalidSettingsException Something went wrong.
public static Pair<Pair<Map<Integer, ? extends HistogramModel<?>>, Map<Integer, Map<Integer, Set<RowKey>>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> loadHistograms(final File histogramsGz, final File dataArrayGz, final Set<String> nominalColumns, final BinNumberSelectionStrategy strategy, final double[] means) throws IOException, InvalidSettingsException {
    Map<Integer, Map<Integer, Set<RowKey>>> numericKeys = new HashMap<Integer, Map<Integer, Set<RowKey>>>();
    Map<Integer, HistogramNumericModel> histograms = loadHistogramsPrivate(histogramsGz, numericKeys, strategy, means);
    Map<Integer, Map<DataValue, Set<RowKey>>> nominalKeys = new HashMap<Integer, Map<DataValue, Set<RowKey>>>();
    ContainerTable table = DataContainer.readFromZip(dataArrayGz);
    Set<Integer> numericColIndices = numericKeys.keySet();
    for (String colName : nominalColumns) {
        int colIndex = table.getDataTableSpec().findColumnIndex(colName);
        if (colIndex < 0) {
        nominalKeys.put(Integer.valueOf(colIndex), new HashMap<DataValue, Set<RowKey>>());
    for (DataRow dataRow : table) {
        for (Integer col : numericColIndices) {
            // Integer col = Integer.valueOf(colIdx);
            HistogramNumericModel hd = histograms.get(col);
            Map<Integer, Set<RowKey>> map = numericKeys.get(col);
            DataCell cell = dataRow.getCell(col.intValue());
            if (!cell.isMissing() && cell instanceof DoubleValue) {
                DoubleValue dv = (DoubleValue) cell;
                Integer bin = Integer.valueOf(hd.findBin(dv));
                if (!map.containsKey(bin)) {
                    map.put(bin, new HashSet<RowKey>());
        for (Entry<Integer, Map<DataValue, Set<RowKey>>> entry : nominalKeys.entrySet()) {
            DataCell value = dataRow.getCell(entry.getKey().intValue());
            Map<DataValue, Set<RowKey>> map = entry.getValue();
            if (!map.containsKey(value)) {
                map.put(value, new HashSet<RowKey>());
    return Pair.create(new Pair<Map<Integer, ? extends HistogramModel<?>>, Map<Integer, Map<Integer, Set<RowKey>>>>(histograms, numericKeys), nominalKeys);
Also used : Set(java.util.Set) HashSet(java.util.HashSet) RowKey( HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DataValue( DataRow( ContainerTable( DoubleValue( DataCell( Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 28 with DataRow

use of in project knime-core by knime.

the class RankCorrelationComputeNodeModel method filterMissings.

 * @param filteredTable a Buffered Data Table.
 * @param exec The execution context
 * @return the table without any rows containing missing values.
private BufferedDataTable filterMissings(final BufferedDataTable filteredTable, final ExecutionContext exec) {
    BufferedDataContainer tab = exec.createDataContainer(filteredTable.getDataTableSpec());
    for (DataRow row : filteredTable) {
        boolean includeRow = true;
        // check row for missingvalues
        for (DataCell cell : row) {
            if (cell.isMissing()) {
                includeRow = false;
        if (includeRow) {
    return tab.getTable();
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataCell( DataRow(

Example 29 with DataRow

use of in project knime-core by knime.

the class SortedCorrelationComputer method calculateKendall.

 * Calculates the kendall rank for all pairs of Data table columns based on previously calculated ranks.
 * @param exec the Execution context.
 * @param corrType the type of correlation used, as defined in CorrelationComputeNodeModel
 * @return the output matrix to be turned into the output model
 * @throws CanceledExecutionException if canceled by users
HalfDoubleMatrix calculateKendall(final String corrType, final ExecutionMonitor exec) throws CanceledExecutionException {
    // the ranking must have been calculated before
    assert (m_rank != null);
    final int coCount = m_rank.getDataTableSpec().getNumColumns();
    HalfDoubleMatrix nominatorMatrix = new HalfDoubleMatrix(coCount, /*includeDiagonal=*/
    double[][] cMatrix = new double[coCount][coCount];
    double[][] dMatrix = new double[coCount][coCount];
    double[][] txMatrix = new double[coCount][coCount];
    double[][] tyMatrix = new double[coCount][coCount];
    // double[][] txyMatrix = new double[coCount][coCount];
    final DataCell[] cells = new DataCell[m_rank.getDataTableSpec().getNumColumns()];
    final DataCell[] cells2 = new DataCell[m_rank.getDataTableSpec().getNumColumns()];
    int rowIndex = 0;
    final int rowCount = m_rank.getRowCount();
    for (DataRow r : m_rank) {
        // multiple times, so we buffer it
        for (int i = 0; i < cells.length; i++) {
            cells[i] = r.getCell(i);
        for (DataRow r2 : m_rank) {
            // multiple times, so we buffer it
            for (int i = 0; i < cells2.length; i++) {
                cells2[i] = r2.getCell(i);
            for (int i = 0; i < coCount; i++) {
                final double x1 = ((DoubleValue) cells[i]).getDoubleValue();
                final double x2 = ((DoubleValue) cells2[i]).getDoubleValue();
                for (int j = 0; j < coCount; j++) {
                    final double y1 = ((DoubleValue) cells[j]).getDoubleValue();
                    final double y2 = ((DoubleValue) cells2[j]).getDoubleValue();
                    if (x1 < x2 && y1 < y2) {
                        // values are concordant
                    } else if (x1 < x2 && y1 > y2) {
                        // values are discordant
                    } else if (x1 != x2 && y1 == y2) {
                        // values are bounded in y
                    } else if (x1 == x2 && y1 != y2) {
                        // values are bounded in x
                    } else {
                    // (x1 == x2 && y1 == y2) { values are bounded in x and y
                    // txyMatrix[i][j]++; // no measure need this count
        exec.setProgress(0.95 * rowIndex / rowCount, String.format("Calculating - %d/%d (\"%s\")", rowIndex, rowCount, r.getKey()));
    if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KENDALLA)) {
        double nrOfRows = m_rank.getRowCount();
        // kendalls Tau a
        double divisor = (nrOfRows * (nrOfRows - 1.0)) * 0.5;
        for (int i = 0; i < coCount; i++) {
            for (int j = i + 1; j < coCount; j++) {
                nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / divisor);
            exec.setProgress(0.05 * i / coCount, "Calculating correlations");
    } else if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KENDALLB)) {
        // kendalls Tau b
        for (int i = 0; i < coCount; i++) {
            for (int j = i + 1; j < coCount; j++) {
                double div = Math.sqrt(cMatrix[i][j] + dMatrix[i][j] + txMatrix[i][j]) * Math.sqrt(cMatrix[i][j] + dMatrix[i][j] + tyMatrix[i][j]);
                nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / div);
            exec.setProgress(0.05 * i / coCount, "Calculating correlations");
    } else if (corrType.equals(RankCorrelationComputeNodeModel.CFG_KRUSKALAL)) {
        // Kruskals Gamma
        for (int i = 0; i < coCount; i++) {
            for (int j = i + 1; j < coCount; j++) {
                nominatorMatrix.set(i, j, (cMatrix[i][j] - dMatrix[i][j]) / (cMatrix[i][j] + dMatrix[i][j]));
            exec.setProgress(0.05 * i / coCount, "Calculating correlations");
    return nominatorMatrix;
Also used : DoubleValue( HalfDoubleMatrix(org.knime.base.util.HalfDoubleMatrix) DataCell( DataRow(

Example 30 with DataRow

use of in project knime-core by knime.

the class NewToOldTimeNodeModel method createStreamableOperator.

 * {@inheritDoc}
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    return new StreamableOperator() {

        public StreamableOperatorInternals saveInternals() {
            return null;

        public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
            final RowInput in = (RowInput) inputs[0];
            final RowOutput out = (RowOutput) outputs[0];
            final DataTableSpec inSpec = in.getDataTableSpec();
            String[] includeList = m_colSelect.applyTo(inSpec).getIncludes();
            final int[] includeIndeces = -> inSpec.findColumnIndex(s)).toArray();
            DataRow row;
            while ((row = in.poll()) != null) {
                DataCell[] datacells = new DataCell[includeIndeces.length];
                for (int i = 0; i < includeIndeces.length; i++) {
                    if (m_isReplaceOrAppend.getStringValue().equals(OPTION_REPLACE)) {
                        final DataColumnSpecCreator dataColumnSpecCreator = new DataColumnSpecCreator(includeList[i], DateAndTimeCell.TYPE);
                        final ConvertTimeCellFactory cellFac = new ConvertTimeCellFactory(dataColumnSpecCreator.createSpec(), includeIndeces[i]);
                        datacells[i] = cellFac.getCells(row)[0];
                    } else {
                        final DataColumnSpec dataColSpec = new UniqueNameGenerator(inSpec).newColumn(includeList[i] + m_suffix.getStringValue(), DateAndTimeCell.TYPE);
                        final ConvertTimeCellFactory cellFac = new ConvertTimeCellFactory(dataColSpec, includeIndeces[i]);
                        datacells[i] = cellFac.getCells(row)[0];
                if (m_isReplaceOrAppend.getStringValue().equals(OPTION_REPLACE)) {
                    out.push(new ReplacedColumnsDataRow(row, datacells, includeIndeces));
                } else {
                    out.push(new AppendedColumnRow(row, datacells));
Also used : Arrays(java.util.Arrays) NodeSettingsRO(org.knime.core.node.NodeSettingsRO) DataTableSpec( InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) ZonedDateTime(java.time.ZonedDateTime) UniqueNameGenerator(org.knime.core.util.UniqueNameGenerator) LocalDateTime(java.time.LocalDateTime) AppendedColumnRow( LocalTimeValue( LocalDateValue( ExecutionContext(org.knime.core.node.ExecutionContext) SingleCellFactory( StreamableOperatorInternals(org.knime.core.node.streamable.StreamableOperatorInternals) DataColumnSpec( DataColumnSpecCreator( LocalTime(java.time.LocalTime) ZoneOffset(java.time.ZoneOffset) DataCell( ZonedDateTimeValue( PortInput(org.knime.core.node.streamable.PortInput) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) PartitionInfo(org.knime.core.node.streamable.PartitionInfo) RowInput(org.knime.core.node.streamable.RowInput) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) IOException( SettingsModelColumnFilter2(org.knime.core.node.defaultnodesettings.SettingsModelColumnFilter2) OutputPortRole(org.knime.core.node.streamable.OutputPortRole) ReplacedColumnsDataRow( NodeModel(org.knime.core.node.NodeModel) File( DataRow( SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) PortOutput(org.knime.core.node.streamable.PortOutput) NodeSettingsWO(org.knime.core.node.NodeSettingsWO) TimeUnit(java.util.concurrent.TimeUnit) BufferedDataTable(org.knime.core.node.BufferedDataTable) LocalDateTimeValue( InputPortRole(org.knime.core.node.streamable.InputPortRole) LocalDate(java.time.LocalDate) ColumnRearranger( DateAndTimeCell( RowOutput(org.knime.core.node.streamable.RowOutput) StreamableOperator(org.knime.core.node.streamable.StreamableOperator) DataTableSpec( DataColumnSpecCreator( StreamableOperator(org.knime.core.node.streamable.StreamableOperator) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) RowInput(org.knime.core.node.streamable.RowInput) ReplacedColumnsDataRow( DataRow( UniqueNameGenerator(org.knime.core.util.UniqueNameGenerator) RowOutput(org.knime.core.node.streamable.RowOutput) ExecutionContext(org.knime.core.node.ExecutionContext) DataColumnSpec( DataCell( ReplacedColumnsDataRow( AppendedColumnRow(


DataRow ( DataCell ( DataTableSpec ( BufferedDataTable (org.knime.core.node.BufferedDataTable)125 DataColumnSpec ( RowKey ( DefaultRow ( BufferedDataContainer (org.knime.core.node.BufferedDataContainer)80 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)76 ColumnRearranger ( DoubleValue ( ArrayList (java.util.ArrayList)65 DataColumnSpecCreator ( RowIterator ( DataType ( DoubleCell ( StringCell ( SingleCellFactory ( ExecutionMonitor (org.knime.core.node.ExecutionMonitor)44 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)43