use of org.knime.core.util.Pair in project knime-core by knime.
the class PivotNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final DataTableSpec inspec = inData[0].getDataTableSpec();
final int group = inspec.findColumnIndex(m_group.getStringValue());
final int pivot = inspec.findColumnIndex(m_pivot.getStringValue());
final int aggre = (m_makeAgg.getStringValue().equals(PivotNodeDialogPane.MAKE_AGGREGATION[1]) ? inspec.findColumnIndex(m_agg.getStringValue()) : -1);
PivotAggregationMethod aggMethod;
if (aggre < 0) {
aggMethod = PivotAggregationMethod.COUNT;
} else {
aggMethod = PivotAggregationMethod.METHODS.get(m_aggMethod.getStringValue());
}
// pair contains group and pivot plus the aggregation value
final Map<Pair<String, String>, Double[]> map = new LinkedHashMap<Pair<String, String>, Double[]>();
// list of pivot values
final Set<String> pivotList = new LinkedHashSet<String>();
final DataColumnSpec pivotSpec = inspec.getColumnSpec(pivot);
if (pivotSpec.getDomain().hasValues()) {
for (DataCell domValue : pivotSpec.getDomain().getValues()) {
pivotList.add(domValue.toString());
}
}
// list of group values
final Set<String> groupList = new LinkedHashSet<String>();
final LinkedHashMap<RowKey, Set<RowKey>> mapping = new LinkedHashMap<RowKey, Set<RowKey>>();
final double nrRows = inData[0].getRowCount();
int rowCnt = 0;
ExecutionContext subExec = exec.createSubExecutionContext(0.75);
// final all group, pivot pair and aggregate the values of each group
for (final DataRow row : inData[0]) {
subExec.checkCanceled();
subExec.setProgress(++rowCnt / nrRows, "Aggregating row: \"" + row.getKey().getString() + "\" (" + rowCnt + "\\" + (int) nrRows + ")");
final String groupString = row.getCell(group).toString();
groupList.add(groupString);
final DataCell pivotCell = row.getCell(pivot);
// if missing values should be ignored
if (pivotCell.isMissing()) {
if (m_ignoreMissValues.getBooleanValue()) {
continue;
}
}
final String pivotString = pivotCell.toString();
pivotList.add(pivotString);
final Pair<String, String> pair = new Pair<String, String>(groupString, pivotString);
Double[] aggValue = map.get(pair);
if (aggValue == null) {
aggValue = aggMethod.init();
map.put(pair, aggValue);
}
if (aggre < 0) {
aggMethod.compute(aggValue, null);
} else {
final DataCell value = row.getCell(aggre);
aggMethod.compute(aggValue, value);
}
if (m_hiliting.getBooleanValue()) {
final RowKey groupKey = new RowKey(groupString);
Set<RowKey> set = mapping.get(groupKey);
if (set == null) {
set = new LinkedHashSet<RowKey>();
mapping.put(groupKey, set);
}
set.add(row.getKey());
}
}
final DataTableSpec outspec = initSpec(pivotList);
// will contain the final pivoting table
final BufferedDataContainer buf = exec.createDataContainer(outspec);
final double nrElements = groupList.size();
int elementCnt = 0;
subExec = exec.createSubExecutionContext(0.25);
for (final String groupString : groupList) {
subExec.checkCanceled();
subExec.setProgress(++elementCnt / nrElements, "Computing aggregation of group \"" + groupString + "\" (" + elementCnt + "\\" + (int) nrElements + ")");
// contains the aggregated values
final DataCell[] aggValues = new DataCell[pivotList.size()];
// pivot index
int idx = 0;
for (final String pivotString : pivotList) {
final Pair<String, String> newPair = new Pair<String, String>(groupString, pivotString);
final Double[] aggValue = map.get(newPair);
aggValues[idx] = aggMethod.done(aggValue);
idx++;
}
// create new row with the given group id and aggregation values
buf.addRowToTable(new DefaultRow(groupString, aggValues));
}
buf.close();
if (m_hiliting.getBooleanValue()) {
m_translator.setMapper(new DefaultHiLiteMapper(mapping));
}
return new BufferedDataTable[] { buf.getTable() };
}
use of org.knime.core.util.Pair in project knime-core by knime.
the class Workflow method changeSourcePortsForMetaNode.
/**
* @param metaNodeID ID of the metanode
* @param newPorts The new ports
* @param includeUnchanged If connections that will not change should be included
* @return List of pairs of original (first) and changed (second) connections
*/
List<Pair<ConnectionContainer, ConnectionContainer>> changeSourcePortsForMetaNode(final NodeID metaNodeID, final MetaPortInfo[] newPorts, final boolean includeUnchanged) {
// argument node is either a contained metanode or this wfm itself
// (latter only when updating outgoing connections)
List<Pair<ConnectionContainer, ConnectionContainer>> result = new ArrayList<Pair<ConnectionContainer, ConnectionContainer>>();
final Set<ConnectionContainer> connectionsFromMetaNode = m_connectionsBySource.get(metaNodeID);
for (ConnectionContainer cc : connectionsFromMetaNode) {
int sourcePort = cc.getSourcePort();
boolean hasBeenFound = false;
for (MetaPortInfo mpi : newPorts) {
if (mpi.getOldIndex() == sourcePort) {
hasBeenFound = true;
if (mpi.getNewIndex() != sourcePort || includeUnchanged) {
ConnectionContainer newConn = new ConnectionContainer(metaNodeID, mpi.getNewIndex(), cc.getDest(), cc.getDestPort(), cc.getType(), cc.isFlowVariablePortConnection());
newConn.setUIInfo(cc.getUIInfo());
result.add(new Pair<ConnectionContainer, ConnectionContainer>(cc, newConn));
}
break;
}
}
if (!hasBeenFound) {
throw new IllegalStateException("New meta port information array " + "does not include currently connected ports, unseen connection: " + cc);
}
}
return result;
}
use of org.knime.core.util.Pair in project knime-core by knime.
the class VariableToTable2NodeModel method getVariablesOfInterest.
private List<Pair<String, FlowVariable.Type>> getVariablesOfInterest() {
List<Pair<String, FlowVariable.Type>> result = new ArrayList<Pair<String, FlowVariable.Type>>();
if (m_filter != null) {
String[] names = m_filter.applyTo(getAvailableFlowVariables()).getIncludes();
Map<String, FlowVariable> vars = getAvailableFlowVariables();
for (String name : names) {
result.add(new Pair<String, FlowVariable.Type>(name, vars.get(name).getType()));
}
}
return result;
}
use of org.knime.core.util.Pair in project knime-core by knime.
the class DBAutoBinner method createPMMLPrepocDiscretize.
/**
* This method creates a {@link PMMLPreprocDiscretize} object and is used in {@link DBAutoBinnerNodeModel}
*
* @param cp {@link CredentialsProvider}
* @param connectionSettings {@link DatabaseQueryConnectionSettings}
* @param dataTableSpec DataTableSpec of incoming {@link BufferedDataTable}
* @return a {@link PMMLPreprocDiscretize} object containing required parameters for binning operation
* @throws SQLException
*/
public PMMLPreprocDiscretize createPMMLPrepocDiscretize(final CredentialsProvider cp, final DatabaseQueryConnectionSettings connectionSettings, final DataTableSpec dataTableSpec) throws SQLException {
final String query = connectionSettings.getQuery();
final StatementManipulator statementManipulator = connectionSettings.getUtility().getStatementManipulator();
AutoBinnerLearnSettings settings = getSettings();
String[] includeCols = settings.getFilterConfiguration().applyTo(dataTableSpec).getIncludes();
if (includeCols.length == 0) {
return createDisretizeOp(new LinkedHashMap<>());
}
StringBuilder minMaxQuery = new StringBuilder();
minMaxQuery.append("SELECT");
for (int i = 0; i < includeCols.length; i++) {
minMaxQuery.append(" MAX(" + statementManipulator.quoteIdentifier(includeCols[i]) + ") " + statementManipulator.quoteIdentifier("max_" + includeCols[i]) + ",");
minMaxQuery.append(" MIN(" + statementManipulator.quoteIdentifier(includeCols[i]) + ") " + statementManipulator.quoteIdentifier("min_" + includeCols[i]));
if (i < includeCols.length - 1) {
minMaxQuery.append(",");
}
}
minMaxQuery.append(" FROM (" + query + ") T");
HashMap<String, Pair<Double, Double>> maxAndMin = connectionSettings.execute(cp, conn -> {
double max = 0;
double min = 0;
HashMap<String, Pair<Double, Double>> maxMinMap = new LinkedHashMap<>();
try (ResultSet valueSet = conn.createStatement().executeQuery(minMaxQuery.toString())) {
while (valueSet.next()) {
for (int i = 0; i < includeCols.length; i++) {
max = valueSet.getDouble("max_" + includeCols[i]);
min = valueSet.getDouble("min_" + includeCols[i]);
maxMinMap.put(includeCols[i], new Pair<Double, Double>(min, max));
}
}
}
return maxMinMap;
});
int number = settings.getBinCount();
Map<String, double[]> edgesMap = new LinkedHashMap<>();
for (Entry<String, Pair<Double, Double>> entry : maxAndMin.entrySet()) {
double[] edges = AutoBinner.calculateBounds(number, entry.getValue().getFirst(), entry.getValue().getSecond());
if (settings.getIntegerBounds()) {
edges = AutoBinner.toIntegerBoundaries(edges);
}
edgesMap.put(entry.getKey(), edges);
}
return createDisretizeOp(edgesMap);
}
use of org.knime.core.util.Pair in project knime-core by knime.
the class DBPivotNodeModel method createQuery.
private String createQuery(final DatabaseQueryConnectionSettings connectionSettings, final DataTableSpec dataTableSpec, final ExecutionMonitor exec) throws SQLException, CanceledExecutionException {
final StatementManipulator manipulator = connectionSettings.getUtility().getStatementManipulator();
final String query = connectionSettings.getQuery();
exec.setMessage("Getting pivot values.");
ExecutionMonitor subExec = exec.createSubProgress(0.7);
final List<String> pivotColumns = m_pivotCols.getIncludeList();
final Map<DataColumnSpec, Set<Object>> pivotElements = connectionSettings.execute(getCredentialsProvider(), conn -> {
int counter = 1;
final Map<DataColumnSpec, Set<Object>> pivotMap = new LinkedHashMap<>();
for (String pivotColumn : pivotColumns) {
subExec.setProgress(counter / (double) pivotColumns.size(), "Fetching unique values for column " + pivotColumn + ". There are " + (pivotColumns.size() - counter) + " columns left.");
DataColumnSpec columnSpec = dataTableSpec.getColumnSpec(pivotColumn);
final String valueQuery = "SELECT DISTINCT " + manipulator.quoteIdentifier(pivotColumn) + " FROM (" + query + ") T";
try (ResultSet valueSet = conn.createStatement().executeQuery(valueQuery)) {
exec.checkCanceled();
final Set<Object> vals = new HashSet<>();
while (valueSet.next()) {
final Object dbVal = valueSet.getObject(1);
if (!valueSet.wasNull()) {
vals.add(dbVal);
}
}
pivotMap.put(columnSpec, vals);
counter++;
}
}
return pivotMap;
});
exec.setProgress(0.8, "Getting aggregation methods and columns.");
List<String> groupByColumns = m_groupByCols.getIncludeList();
final List<Pair<String, DBAggregationFunction>> aggValues = new LinkedList<>();
for (int i = 0; i < m_aggregationFunction2Use.size(); i++) {
exec.checkCanceled();
final DBColumnAggregationFunctionRow aggregationFunction = m_aggregationFunction2Use.get(i);
String colName = aggregationFunction.getColumnSpec().getName();
DBAggregationFunction function = aggregationFunction.getFunction();
aggValues.add(new Pair<>(colName, function));
}
final ColumnNamePolicy pivotColPoliciy = ColumnNamePolicy.getPolicy4Label(m_columnNamePolicy.getStringValue());
PivotColumnNameGenerator pivotColName = new PivotColumnNameGenerator() {
@Override
public String createColumnName(final String columnName, final DBAggregationFunction function, final List<Object> pivotValues) {
String vals = "";
Iterator<Object> iterator = pivotValues.iterator();
while (iterator.hasNext()) {
vals = vals + iterator.next() + "_";
}
vals = vals.substring(0, vals.length() - 1);
String method = function.getColumnName();
switch(pivotColPoliciy) {
case KEEP_ORIGINAL_NAME:
return vals + "+" + columnName;
case AGGREGATION_METHOD_COLUMN_NAME:
return vals + "+" + method + "(" + columnName + ")";
case COLUMN_NAME_AGGREGATION_METHOD:
return vals + "+" + columnName + " (" + method + ")";
default:
throw new IllegalStateException("Unhandled column naming policy: " + pivotColPoliciy);
}
}
};
exec.setProgress(0.9, "Creating query.");
exec.checkCanceled();
return manipulator.getPivotStatement(query, groupByColumns, pivotElements, aggValues, pivotColName);
}
Aggregations