use of org.knime.base.node.io.database.groupby.dialog.column.DBColumnAggregationFunctionRow in project knime-core by knime.
the class DBGroupByNodeModel2 method createQuery.
/**
* @param connection
* @param query Query for the input table
* @param manipulator Statement manipulator for the current database
* @return SQL query that applies a group by to the input query
*/
private String createQuery(final DatabaseQueryConnectionSettings connection, final String query, final StatementManipulator manipulator) {
final StringBuilder buf = new StringBuilder();
final String[] queries = query.split(DBReader.SQL_QUERY_SEPARATOR);
for (int i = 0; i < queries.length - 1; i++) {
buf.append(queries[i]);
buf.append(DBReader.SQL_QUERY_SEPARATOR);
}
final String selectQuery = queries[queries.length - 1];
// Build identifier for input table
String tableName = "table_" + System.identityHashCode(this);
final StringBuilder columnBuf = new StringBuilder();
final List<String> groupByCols = m_groupByCols.getIncludeList();
// Add group by columns
for (int i = 0; i < groupByCols.size(); i++) {
columnBuf.append(manipulator.quoteIdentifier(groupByCols.get(i)));
if (i + 1 < groupByCols.size() || m_aggregationFunction2Use.size() > 0 || m_addCountStar.getBooleanValue()) {
columnBuf.append(", ");
}
}
final ColumnNamePolicy columnNamePolicy = ColumnNamePolicy.getPolicy4Label(m_columnNamePolicy.getStringValue());
if (m_addCountStar.getBooleanValue()) {
columnBuf.append("COUNT(*) AS " + manipulator.quoteIdentifier(m_countStarColName.getStringValue()));
if (!m_aggregationFunction2Use.isEmpty()) {
columnBuf.append(", ");
}
}
// Add aggregated columns
for (int i = 0; i < m_aggregationFunction2Use.size(); i++) {
final DBColumnAggregationFunctionRow row = m_aggregationFunction2Use.get(i);
columnBuf.append(row.getSQLFragment(manipulator, tableName));
columnBuf.append(" AS ");
columnBuf.append(manipulator.quoteIdentifier(generateColumnName(columnNamePolicy, row)));
if (i + 1 < m_aggregationFunction2Use.size()) {
columnBuf.append(", ");
}
}
// we add this hack since google big query requires the AS here but Oracle for example does not supports it
final boolean appendAs = connection.getDriver().toLowerCase().contains("googlebigquery");
buf.append("SELECT " + columnBuf.toString() + " FROM (" + selectQuery + ") ");
if (appendAs) {
buf.append("AS ");
}
buf.append(manipulator.quoteIdentifier(tableName));
// build GROUP BY clause
if (!groupByCols.isEmpty()) {
buf.append(" GROUP BY ");
}
for (int i = 0; i < groupByCols.size(); i++) {
buf.append(manipulator.quoteIdentifier(groupByCols.get(i)));
if (i + 1 < groupByCols.size()) {
buf.append(", ");
}
}
return buf.toString();
}
use of org.knime.base.node.io.database.groupby.dialog.column.DBColumnAggregationFunctionRow in project knime-core by knime.
the class DBGroupByNodeModel2 method createOutSpec.
/**
* @param inSpec Spec of the input table
* @param manipulator
* @param checkRetrieveMetadata
* @return Spec of the output table
* @throws InvalidSettingsException if settings do not match the input specification
*/
private DataTableSpec createOutSpec(final DataTableSpec inSpec, final DatabaseConnectionSettings settings, final String query, final StatementManipulator manipulator, final boolean ignoreExceptions) throws InvalidSettingsException {
// Try get spec from database
try {
DatabaseQueryConnectionSettings querySettings = new DatabaseQueryConnectionSettings(settings, query);
DBReader conn = querySettings.getUtility().getReader(querySettings);
return conn.getDataTableSpec(getCredentialsProvider());
} catch (SQLException e) {
NodeLogger.getLogger(getClass()).info("Could not determine table spec from database, trying to guess now", e);
if (!ignoreExceptions) {
throw new InvalidSettingsException("Error in automatically build sql statement: " + e.getMessage());
}
// Otherwise guess spec
}
final List<DataColumnSpec> colSpecs = new ArrayList<>();
// Add all group by columns
for (String col : m_groupByCols.getIncludeList()) {
final DataColumnSpec columnSpec = inSpec.getColumnSpec(col);
if (columnSpec == null) {
throw new InvalidSettingsException("Group column '" + col + "' not found in input table");
}
colSpecs.add(columnSpec);
}
if (m_addCountStar.getBooleanValue()) {
colSpecs.add(new DataColumnSpecCreator(manipulator.getValidColumnName(m_countStarColName.getStringValue()), LongCell.TYPE).createSpec());
}
final ColumnNamePolicy columnNamePolicy = ColumnNamePolicy.getPolicy4Label(m_columnNamePolicy.getStringValue());
// Add aggregated columns
for (int i = 0; i < m_aggregationFunction2Use.size(); i++) {
final DBColumnAggregationFunctionRow row = m_aggregationFunction2Use.get(i);
final String col = row.getColumnSpec().getName();
final String methodId = row.getFunction().getId();
if (inSpec.getColumnSpec(col) == null) {
throw new InvalidSettingsException("Column '" + col + "' for aggregation function " + row.getFunction().getLabel() + " does not exist");
}
final DatabaseUtility databaseUtility = settings.getUtility();
final DBAggregationFunction function = databaseUtility.getAggregationFunction(methodId);
// Get type of column after aggregation
final DataType type = function.getType(inSpec.getColumnSpec(col).getType());
colSpecs.add(new DataColumnSpecCreator(manipulator.getValidColumnName(generateColumnName(columnNamePolicy, row)), type).createSpec());
}
return new DataTableSpec(colSpecs.toArray(new DataColumnSpec[colSpecs.size()]));
}
use of org.knime.base.node.io.database.groupby.dialog.column.DBColumnAggregationFunctionRow in project knime-core by knime.
the class DBGroupByNodeModel2 method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
final DatabasePortObjectSpec dbSpec = (DatabasePortObjectSpec) inSpecs[0];
final DataTableSpec tableSpec = dbSpec.getDataTableSpec();
final DatabaseQueryConnectionSettings connection = dbSpec.getConnectionSettings(null);
final String dbIdentifier = connection.getDatabaseIdentifier();
final List<DBColumnAggregationFunctionRow> columnFunctions = DBColumnAggregationFunctionRow.loadFunctions(m_settings, DBGroupByNodeModel2.CFG_AGGREGATION_FUNCTIONS, dbIdentifier, tableSpec);
final ArrayList<DBColumnAggregationFunctionRow> invalidColAggrs = new ArrayList<>(1);
final Set<String> usedColNames = new HashSet<>(tableSpec.getNumColumns());
usedColNames.addAll(m_groupByCols.getIncludeList());
m_aggregationFunction2Use.clear();
for (DBColumnAggregationFunctionRow row : columnFunctions) {
final DataColumnSpec columnSpec = row.getColumnSpec();
final DataColumnSpec inputSpec = tableSpec.getColumnSpec(columnSpec.getName());
final AggregationFunction function = row.getFunction();
if (inputSpec == null || !inputSpec.getType().equals(columnSpec.getType())) {
invalidColAggrs.add(row);
continue;
}
if (function instanceof InvalidAggregationFunction) {
throw new InvalidSettingsException(((InvalidAggregationFunction) function).getErrorMessage());
}
if (function.hasOptionalSettings()) {
try {
function.configure(tableSpec);
} catch (InvalidSettingsException e) {
throw new InvalidSettingsException("Exception in aggregation function " + function.getLabel() + " of column " + row.getColumnSpec().getName() + ": " + e.getMessage());
}
}
usedColNames.add(row.getColumnSpec().getName());
m_aggregationFunction2Use.add(row);
}
final List<DBPatternAggregationFunctionRow> patternFunctions = DBPatternAggregationFunctionRow.loadFunctions(m_settings, CFG_PATTERN_AGGREGATION_FUNCTIONS, dbIdentifier, tableSpec);
if (tableSpec.getNumColumns() > usedColNames.size() && !patternFunctions.isEmpty()) {
for (final DataColumnSpec spec : tableSpec) {
if (!usedColNames.contains(spec.getName())) {
for (final DBPatternAggregationFunctionRow patternFunction : patternFunctions) {
final Pattern pattern = patternFunction.getRegexPattern();
final DBAggregationFunction function = patternFunction.getFunction();
if (pattern != null && pattern.matcher(spec.getName()).matches() && function.isCompatible(spec.getType())) {
final DBColumnAggregationFunctionRow row = new DBColumnAggregationFunctionRow(spec, patternFunction.getFunction());
m_aggregationFunction2Use.add(row);
usedColNames.add(spec.getName());
}
}
}
}
}
final List<DBDataTypeAggregationFunctionRow> typeFunctions = DBDataTypeAggregationFunctionRow.loadFunctions(m_settings, CFG_TYPE_AGGREGATION_FUNCTIONS, dbIdentifier, tableSpec);
// check if some columns are left
if (tableSpec.getNumColumns() > usedColNames.size() && !typeFunctions.isEmpty()) {
for (final DataColumnSpec spec : tableSpec) {
if (!usedColNames.contains(spec.getName())) {
final DataType dataType = spec.getType();
for (final DBDataTypeAggregationFunctionRow typeAggregator : typeFunctions) {
if (typeAggregator.isCompatibleType(dataType)) {
final DBColumnAggregationFunctionRow row = new DBColumnAggregationFunctionRow(spec, typeAggregator.getFunction());
m_aggregationFunction2Use.add(row);
usedColNames.add(spec.getName());
}
}
}
}
}
if (m_groupByCols.getIncludeList().isEmpty() && m_aggregationFunction2Use.isEmpty() && !m_addCountStar.getBooleanValue()) {
throw new InvalidSettingsException("Please select at least one group or aggregation function or the " + "COUNT(*) option.");
}
if (!invalidColAggrs.isEmpty()) {
setWarningMessage(invalidColAggrs.size() + " aggregation functions ignored due to incompatible columns.");
}
return new PortObjectSpec[] { createDbOutSpec(dbSpec, true) };
}
use of org.knime.base.node.io.database.groupby.dialog.column.DBColumnAggregationFunctionRow in project knime-core by knime.
the class DBPivotNodeModel method configure.
/**
* {@inheritDoc}
*/
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
final DatabasePortObjectSpec dbSpec = (DatabasePortObjectSpec) inSpecs[0];
final DataTableSpec tableSpec = dbSpec.getDataTableSpec();
final DatabaseQueryConnectionSettings connection = dbSpec.getConnectionSettings(getCredentialsProvider());
final String dbIdentifier = connection.getDatabaseIdentifier();
final List<DBColumnAggregationFunctionRow> columnFunctions = DBColumnAggregationFunctionRow.loadFunctions(m_settings, DBPivotNodeModel.CFG_AGGREGATION_FUNCTIONS, dbIdentifier, tableSpec);
final ArrayList<DBColumnAggregationFunctionRow> invalidColAggrs = new ArrayList<>(1);
final Set<String> usedColNames = new HashSet<>(tableSpec.getNumColumns());
usedColNames.addAll(m_groupByCols.getIncludeList());
usedColNames.addAll(m_pivotCols.getIncludeList());
m_aggregationFunction2Use.clear();
for (DBColumnAggregationFunctionRow row : columnFunctions) {
final DataColumnSpec columnSpec = row.getColumnSpec();
final DataColumnSpec inputSpec = tableSpec.getColumnSpec(columnSpec.getName());
final AggregationFunction function = row.getFunction();
if (inputSpec == null || !inputSpec.getType().equals(columnSpec.getType())) {
invalidColAggrs.add(row);
continue;
}
if (function instanceof InvalidAggregationFunction) {
throw new InvalidSettingsException(((InvalidAggregationFunction) function).getErrorMessage());
}
if (function.hasOptionalSettings()) {
try {
function.configure(tableSpec);
} catch (InvalidSettingsException e) {
throw new InvalidSettingsException("Wrong aggregation function configuration '" + function.getLabel() + "' of column '" + row.getColumnSpec().getName() + "': " + e.getMessage(), e);
}
}
usedColNames.add(row.getColumnSpec().getName());
m_aggregationFunction2Use.add(row);
}
if (m_aggregationFunction2Use.isEmpty()) {
throw new InvalidSettingsException("No aggregation columns selected.");
}
if (m_groupByCols.getIncludeList().isEmpty()) {
setWarningMessage("No grouping column included. Aggregate complete table");
}
if (m_pivotCols.getIncludeList().isEmpty()) {
throw new InvalidSettingsException("No pivot columns selected.");
}
if (!invalidColAggrs.isEmpty()) {
setWarningMessage(invalidColAggrs.size() + " aggregation functions ignored due to incompatible columns.");
}
final DatabasePortObjectSpec resultSpec;
if (connection.getRetrieveMetadataInConfigure()) {
try {
resultSpec = createDbOutSpec(dbSpec, new ExecutionMonitor());
} catch (CanceledExecutionException e) {
throw new InvalidSettingsException(e.getMessage());
}
} else {
resultSpec = null;
}
return new PortObjectSpec[] { resultSpec };
}
use of org.knime.base.node.io.database.groupby.dialog.column.DBColumnAggregationFunctionRow in project knime-core by knime.
the class DBPivotNodeModel method createQuery.
private String createQuery(final DatabaseQueryConnectionSettings connectionSettings, final DataTableSpec dataTableSpec, final ExecutionMonitor exec) throws SQLException, CanceledExecutionException {
final StatementManipulator manipulator = connectionSettings.getUtility().getStatementManipulator();
final String query = connectionSettings.getQuery();
exec.setMessage("Getting pivot values.");
ExecutionMonitor subExec = exec.createSubProgress(0.7);
final List<String> pivotColumns = m_pivotCols.getIncludeList();
final Map<DataColumnSpec, Set<Object>> pivotElements = connectionSettings.execute(getCredentialsProvider(), conn -> {
int counter = 1;
final Map<DataColumnSpec, Set<Object>> pivotMap = new LinkedHashMap<>();
for (String pivotColumn : pivotColumns) {
subExec.setProgress(counter / (double) pivotColumns.size(), "Fetching unique values for column " + pivotColumn + ". There are " + (pivotColumns.size() - counter) + " columns left.");
DataColumnSpec columnSpec = dataTableSpec.getColumnSpec(pivotColumn);
final String valueQuery = "SELECT DISTINCT " + manipulator.quoteIdentifier(pivotColumn) + " FROM (" + query + ") T";
try (ResultSet valueSet = conn.createStatement().executeQuery(valueQuery)) {
exec.checkCanceled();
final Set<Object> vals = new HashSet<>();
while (valueSet.next()) {
final Object dbVal = valueSet.getObject(1);
if (!valueSet.wasNull()) {
vals.add(dbVal);
}
}
pivotMap.put(columnSpec, vals);
counter++;
}
}
return pivotMap;
});
exec.setProgress(0.8, "Getting aggregation methods and columns.");
List<String> groupByColumns = m_groupByCols.getIncludeList();
final List<Pair<String, DBAggregationFunction>> aggValues = new LinkedList<>();
for (int i = 0; i < m_aggregationFunction2Use.size(); i++) {
exec.checkCanceled();
final DBColumnAggregationFunctionRow aggregationFunction = m_aggregationFunction2Use.get(i);
String colName = aggregationFunction.getColumnSpec().getName();
DBAggregationFunction function = aggregationFunction.getFunction();
aggValues.add(new Pair<>(colName, function));
}
final ColumnNamePolicy pivotColPoliciy = ColumnNamePolicy.getPolicy4Label(m_columnNamePolicy.getStringValue());
PivotColumnNameGenerator pivotColName = new PivotColumnNameGenerator() {
@Override
public String createColumnName(final String columnName, final DBAggregationFunction function, final List<Object> pivotValues) {
String vals = "";
Iterator<Object> iterator = pivotValues.iterator();
while (iterator.hasNext()) {
vals = vals + iterator.next() + "_";
}
vals = vals.substring(0, vals.length() - 1);
String method = function.getColumnName();
switch(pivotColPoliciy) {
case KEEP_ORIGINAL_NAME:
return vals + "+" + columnName;
case AGGREGATION_METHOD_COLUMN_NAME:
return vals + "+" + method + "(" + columnName + ")";
case COLUMN_NAME_AGGREGATION_METHOD:
return vals + "+" + columnName + " (" + method + ")";
default:
throw new IllegalStateException("Unhandled column naming policy: " + pivotColPoliciy);
}
}
};
exec.setProgress(0.9, "Creating query.");
exec.checkCanceled();
return manipulator.getPivotStatement(query, groupByColumns, pivotElements, aggValues, pivotColName);
}
Aggregations