Search in sources :

Example 21 with Pair

use of org.knime.core.util.Pair in project knime-core by knime.

the class PivotNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    final DataTableSpec inspec = inData[0].getDataTableSpec();
    final int group = inspec.findColumnIndex(m_group.getStringValue());
    final int pivot = inspec.findColumnIndex(m_pivot.getStringValue());
    final int aggre = (m_makeAgg.getStringValue().equals(PivotNodeDialogPane.MAKE_AGGREGATION[1]) ? inspec.findColumnIndex(m_agg.getStringValue()) : -1);
    PivotAggregationMethod aggMethod;
    if (aggre < 0) {
        aggMethod = PivotAggregationMethod.COUNT;
    } else {
        aggMethod = PivotAggregationMethod.METHODS.get(m_aggMethod.getStringValue());
    }
    // pair contains group and pivot plus the aggregation value
    final Map<Pair<String, String>, Double[]> map = new LinkedHashMap<Pair<String, String>, Double[]>();
    // list of pivot values
    final Set<String> pivotList = new LinkedHashSet<String>();
    final DataColumnSpec pivotSpec = inspec.getColumnSpec(pivot);
    if (pivotSpec.getDomain().hasValues()) {
        for (DataCell domValue : pivotSpec.getDomain().getValues()) {
            pivotList.add(domValue.toString());
        }
    }
    // list of group values
    final Set<String> groupList = new LinkedHashSet<String>();
    final LinkedHashMap<RowKey, Set<RowKey>> mapping = new LinkedHashMap<RowKey, Set<RowKey>>();
    final double nrRows = inData[0].getRowCount();
    int rowCnt = 0;
    ExecutionContext subExec = exec.createSubExecutionContext(0.75);
    // final all group, pivot pair and aggregate the values of each group
    for (final DataRow row : inData[0]) {
        subExec.checkCanceled();
        subExec.setProgress(++rowCnt / nrRows, "Aggregating row: \"" + row.getKey().getString() + "\" (" + rowCnt + "\\" + (int) nrRows + ")");
        final String groupString = row.getCell(group).toString();
        groupList.add(groupString);
        final DataCell pivotCell = row.getCell(pivot);
        // if missing values should be ignored
        if (pivotCell.isMissing()) {
            if (m_ignoreMissValues.getBooleanValue()) {
                continue;
            }
        }
        final String pivotString = pivotCell.toString();
        pivotList.add(pivotString);
        final Pair<String, String> pair = new Pair<String, String>(groupString, pivotString);
        Double[] aggValue = map.get(pair);
        if (aggValue == null) {
            aggValue = aggMethod.init();
            map.put(pair, aggValue);
        }
        if (aggre < 0) {
            aggMethod.compute(aggValue, null);
        } else {
            final DataCell value = row.getCell(aggre);
            aggMethod.compute(aggValue, value);
        }
        if (m_hiliting.getBooleanValue()) {
            final RowKey groupKey = new RowKey(groupString);
            Set<RowKey> set = mapping.get(groupKey);
            if (set == null) {
                set = new LinkedHashSet<RowKey>();
                mapping.put(groupKey, set);
            }
            set.add(row.getKey());
        }
    }
    final DataTableSpec outspec = initSpec(pivotList);
    // will contain the final pivoting table
    final BufferedDataContainer buf = exec.createDataContainer(outspec);
    final double nrElements = groupList.size();
    int elementCnt = 0;
    subExec = exec.createSubExecutionContext(0.25);
    for (final String groupString : groupList) {
        subExec.checkCanceled();
        subExec.setProgress(++elementCnt / nrElements, "Computing aggregation of group \"" + groupString + "\" (" + elementCnt + "\\" + (int) nrElements + ")");
        // contains the aggregated values
        final DataCell[] aggValues = new DataCell[pivotList.size()];
        // pivot index
        int idx = 0;
        for (final String pivotString : pivotList) {
            final Pair<String, String> newPair = new Pair<String, String>(groupString, pivotString);
            final Double[] aggValue = map.get(newPair);
            aggValues[idx] = aggMethod.done(aggValue);
            idx++;
        }
        // create new row with the given group id and aggregation values
        buf.addRowToTable(new DefaultRow(groupString, aggValues));
    }
    buf.close();
    if (m_hiliting.getBooleanValue()) {
        m_translator.setMapper(new DefaultHiLiteMapper(mapping));
    }
    return new BufferedDataTable[] { buf.getTable() };
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) DefaultHiLiteMapper(org.knime.core.node.property.hilite.DefaultHiLiteMapper) Pair(org.knime.core.util.Pair) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) ExecutionContext(org.knime.core.node.ExecutionContext) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 22 with Pair

use of org.knime.core.util.Pair in project knime-core by knime.

the class Workflow method changeSourcePortsForMetaNode.

/**
 * @param metaNodeID ID of the metanode
 * @param newPorts The new ports
 * @param includeUnchanged If connections that will not change should be included
 * @return List of pairs of original (first) and changed (second) connections
 */
List<Pair<ConnectionContainer, ConnectionContainer>> changeSourcePortsForMetaNode(final NodeID metaNodeID, final MetaPortInfo[] newPorts, final boolean includeUnchanged) {
    // argument node is either a contained metanode or this wfm itself
    // (latter only when updating outgoing connections)
    List<Pair<ConnectionContainer, ConnectionContainer>> result = new ArrayList<Pair<ConnectionContainer, ConnectionContainer>>();
    final Set<ConnectionContainer> connectionsFromMetaNode = m_connectionsBySource.get(metaNodeID);
    for (ConnectionContainer cc : connectionsFromMetaNode) {
        int sourcePort = cc.getSourcePort();
        boolean hasBeenFound = false;
        for (MetaPortInfo mpi : newPorts) {
            if (mpi.getOldIndex() == sourcePort) {
                hasBeenFound = true;
                if (mpi.getNewIndex() != sourcePort || includeUnchanged) {
                    ConnectionContainer newConn = new ConnectionContainer(metaNodeID, mpi.getNewIndex(), cc.getDest(), cc.getDestPort(), cc.getType(), cc.isFlowVariablePortConnection());
                    newConn.setUIInfo(cc.getUIInfo());
                    result.add(new Pair<ConnectionContainer, ConnectionContainer>(cc, newConn));
                }
                break;
            }
        }
        if (!hasBeenFound) {
            throw new IllegalStateException("New meta port information array " + "does not include currently connected ports, unseen connection: " + cc);
        }
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) MetaPortInfo(org.knime.core.node.port.MetaPortInfo) Pair(org.knime.core.util.Pair)

Example 23 with Pair

use of org.knime.core.util.Pair in project knime-core by knime.

the class VariableToTable2NodeModel method getVariablesOfInterest.

private List<Pair<String, FlowVariable.Type>> getVariablesOfInterest() {
    List<Pair<String, FlowVariable.Type>> result = new ArrayList<Pair<String, FlowVariable.Type>>();
    if (m_filter != null) {
        String[] names = m_filter.applyTo(getAvailableFlowVariables()).getIncludes();
        Map<String, FlowVariable> vars = getAvailableFlowVariables();
        for (String name : names) {
            result.add(new Pair<String, FlowVariable.Type>(name, vars.get(name).getType()));
        }
    }
    return result;
}
Also used : PortType(org.knime.core.node.port.PortType) DataType(org.knime.core.data.DataType) ArrayList(java.util.ArrayList) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) Pair(org.knime.core.util.Pair) FlowVariable(org.knime.core.node.workflow.FlowVariable)

Example 24 with Pair

use of org.knime.core.util.Pair in project knime-core by knime.

the class DBAutoBinner method createPMMLPrepocDiscretize.

/**
 * This method creates a {@link PMMLPreprocDiscretize} object and is used in {@link DBAutoBinnerNodeModel}
 *
 * @param cp {@link CredentialsProvider}
 * @param connectionSettings {@link DatabaseQueryConnectionSettings}
 * @param dataTableSpec DataTableSpec of incoming {@link BufferedDataTable}
 * @return a {@link PMMLPreprocDiscretize} object containing required parameters for binning operation
 * @throws SQLException
 */
public PMMLPreprocDiscretize createPMMLPrepocDiscretize(final CredentialsProvider cp, final DatabaseQueryConnectionSettings connectionSettings, final DataTableSpec dataTableSpec) throws SQLException {
    final String query = connectionSettings.getQuery();
    final StatementManipulator statementManipulator = connectionSettings.getUtility().getStatementManipulator();
    AutoBinnerLearnSettings settings = getSettings();
    String[] includeCols = settings.getFilterConfiguration().applyTo(dataTableSpec).getIncludes();
    if (includeCols.length == 0) {
        return createDisretizeOp(new LinkedHashMap<>());
    }
    StringBuilder minMaxQuery = new StringBuilder();
    minMaxQuery.append("SELECT");
    for (int i = 0; i < includeCols.length; i++) {
        minMaxQuery.append(" MAX(" + statementManipulator.quoteIdentifier(includeCols[i]) + ") " + statementManipulator.quoteIdentifier("max_" + includeCols[i]) + ",");
        minMaxQuery.append(" MIN(" + statementManipulator.quoteIdentifier(includeCols[i]) + ") " + statementManipulator.quoteIdentifier("min_" + includeCols[i]));
        if (i < includeCols.length - 1) {
            minMaxQuery.append(",");
        }
    }
    minMaxQuery.append(" FROM (" + query + ") T");
    HashMap<String, Pair<Double, Double>> maxAndMin = connectionSettings.execute(cp, conn -> {
        double max = 0;
        double min = 0;
        HashMap<String, Pair<Double, Double>> maxMinMap = new LinkedHashMap<>();
        try (ResultSet valueSet = conn.createStatement().executeQuery(minMaxQuery.toString())) {
            while (valueSet.next()) {
                for (int i = 0; i < includeCols.length; i++) {
                    max = valueSet.getDouble("max_" + includeCols[i]);
                    min = valueSet.getDouble("min_" + includeCols[i]);
                    maxMinMap.put(includeCols[i], new Pair<Double, Double>(min, max));
                }
            }
        }
        return maxMinMap;
    });
    int number = settings.getBinCount();
    Map<String, double[]> edgesMap = new LinkedHashMap<>();
    for (Entry<String, Pair<Double, Double>> entry : maxAndMin.entrySet()) {
        double[] edges = AutoBinner.calculateBounds(number, entry.getValue().getFirst(), entry.getValue().getSecond());
        if (settings.getIntegerBounds()) {
            edges = AutoBinner.toIntegerBoundaries(edges);
        }
        edgesMap.put(entry.getKey(), edges);
    }
    return createDisretizeOp(edgesMap);
}
Also used : StatementManipulator(org.knime.core.node.port.database.StatementManipulator) LinkedHashMap(java.util.LinkedHashMap) AutoBinnerLearnSettings(org.knime.base.node.preproc.autobinner3.AutoBinnerLearnSettings) ResultSet(java.sql.ResultSet) Pair(org.knime.core.util.Pair)

Example 25 with Pair

use of org.knime.core.util.Pair in project knime-core by knime.

the class DBPivotNodeModel method createQuery.

private String createQuery(final DatabaseQueryConnectionSettings connectionSettings, final DataTableSpec dataTableSpec, final ExecutionMonitor exec) throws SQLException, CanceledExecutionException {
    final StatementManipulator manipulator = connectionSettings.getUtility().getStatementManipulator();
    final String query = connectionSettings.getQuery();
    exec.setMessage("Getting pivot values.");
    ExecutionMonitor subExec = exec.createSubProgress(0.7);
    final List<String> pivotColumns = m_pivotCols.getIncludeList();
    final Map<DataColumnSpec, Set<Object>> pivotElements = connectionSettings.execute(getCredentialsProvider(), conn -> {
        int counter = 1;
        final Map<DataColumnSpec, Set<Object>> pivotMap = new LinkedHashMap<>();
        for (String pivotColumn : pivotColumns) {
            subExec.setProgress(counter / (double) pivotColumns.size(), "Fetching unique values for column " + pivotColumn + ". There are " + (pivotColumns.size() - counter) + " columns left.");
            DataColumnSpec columnSpec = dataTableSpec.getColumnSpec(pivotColumn);
            final String valueQuery = "SELECT DISTINCT " + manipulator.quoteIdentifier(pivotColumn) + " FROM (" + query + ") T";
            try (ResultSet valueSet = conn.createStatement().executeQuery(valueQuery)) {
                exec.checkCanceled();
                final Set<Object> vals = new HashSet<>();
                while (valueSet.next()) {
                    final Object dbVal = valueSet.getObject(1);
                    if (!valueSet.wasNull()) {
                        vals.add(dbVal);
                    }
                }
                pivotMap.put(columnSpec, vals);
                counter++;
            }
        }
        return pivotMap;
    });
    exec.setProgress(0.8, "Getting aggregation methods and columns.");
    List<String> groupByColumns = m_groupByCols.getIncludeList();
    final List<Pair<String, DBAggregationFunction>> aggValues = new LinkedList<>();
    for (int i = 0; i < m_aggregationFunction2Use.size(); i++) {
        exec.checkCanceled();
        final DBColumnAggregationFunctionRow aggregationFunction = m_aggregationFunction2Use.get(i);
        String colName = aggregationFunction.getColumnSpec().getName();
        DBAggregationFunction function = aggregationFunction.getFunction();
        aggValues.add(new Pair<>(colName, function));
    }
    final ColumnNamePolicy pivotColPoliciy = ColumnNamePolicy.getPolicy4Label(m_columnNamePolicy.getStringValue());
    PivotColumnNameGenerator pivotColName = new PivotColumnNameGenerator() {

        @Override
        public String createColumnName(final String columnName, final DBAggregationFunction function, final List<Object> pivotValues) {
            String vals = "";
            Iterator<Object> iterator = pivotValues.iterator();
            while (iterator.hasNext()) {
                vals = vals + iterator.next() + "_";
            }
            vals = vals.substring(0, vals.length() - 1);
            String method = function.getColumnName();
            switch(pivotColPoliciy) {
                case KEEP_ORIGINAL_NAME:
                    return vals + "+" + columnName;
                case AGGREGATION_METHOD_COLUMN_NAME:
                    return vals + "+" + method + "(" + columnName + ")";
                case COLUMN_NAME_AGGREGATION_METHOD:
                    return vals + "+" + columnName + " (" + method + ")";
                default:
                    throw new IllegalStateException("Unhandled column naming policy: " + pivotColPoliciy);
            }
        }
    };
    exec.setProgress(0.9, "Creating query.");
    exec.checkCanceled();
    return manipulator.getPivotStatement(query, groupByColumns, pivotElements, aggValues, pivotColName);
}
Also used : HashSet(java.util.HashSet) ResultSet(java.sql.ResultSet) Set(java.util.Set) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) StatementManipulator(org.knime.core.node.port.database.StatementManipulator) LinkedList(java.util.LinkedList) LinkedHashMap(java.util.LinkedHashMap) ColumnNamePolicy(org.knime.base.node.preproc.groupby.ColumnNamePolicy) DataColumnSpec(org.knime.core.data.DataColumnSpec) DBColumnAggregationFunctionRow(org.knime.base.node.io.database.groupby.dialog.column.DBColumnAggregationFunctionRow) PivotColumnNameGenerator(org.knime.core.node.port.database.pivoting.PivotColumnNameGenerator) ResultSet(java.sql.ResultSet) DBAggregationFunction(org.knime.core.node.port.database.aggregation.DBAggregationFunction) DatabasePortObject(org.knime.core.node.port.database.DatabasePortObject) PortObject(org.knime.core.node.port.PortObject) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) HashSet(java.util.HashSet) Pair(org.knime.core.util.Pair)

Aggregations

Pair (org.knime.core.util.Pair)54 ArrayList (java.util.ArrayList)17 DataCell (org.knime.core.data.DataCell)14 DataType (org.knime.core.data.DataType)13 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)13 PortType (org.knime.core.node.port.PortType)13 LinkedHashMap (java.util.LinkedHashMap)11 Map (java.util.Map)10 DataColumnSpec (org.knime.core.data.DataColumnSpec)10 HashMap (java.util.HashMap)9 HashSet (java.util.HashSet)9 DataTableSpec (org.knime.core.data.DataTableSpec)9 FlowVariable (org.knime.core.node.workflow.FlowVariable)9 DataRow (org.knime.core.data.DataRow)8 StringCell (org.knime.core.data.def.StringCell)7 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)7 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)6 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)6 DefaultRow (org.knime.core.data.def.DefaultRow)6 DoubleCell (org.knime.core.data.def.DoubleCell)6