use of org.apache.druid.segment.VirtualColumn in project hive by apache.
the class DruidStorageHandlerUtils method extractColName.
@Nullable
public static String extractColName(ExprNodeDesc expr, List<VirtualColumn> virtualColumns) {
if (!druidSupportedTypeInfos.contains(expr.getTypeInfo())) {
// We cannot pass the bloom filter to druid since bloom filter tests for exact object bytes.
return null;
}
if (expr instanceof ExprNodeColumnDesc) {
return ((ExprNodeColumnDesc) expr).getColumn();
}
ExprNodeGenericFuncDesc funcDesc = null;
if (expr instanceof ExprNodeGenericFuncDesc) {
funcDesc = (ExprNodeGenericFuncDesc) expr;
}
if (null == funcDesc) {
return null;
}
GenericUDF udf = funcDesc.getGenericUDF();
// bail out if its not a simple cast expression.
if (funcDesc.getChildren().size() == 1 && funcDesc.getChildren().get(0) instanceof ExprNodeColumnDesc) {
return null;
}
String columnName = ((ExprNodeColumnDesc) (funcDesc.getChildren().get(0))).getColumn();
ValueType targetType = null;
if (udf instanceof GenericUDFBridge) {
Class<? extends UDF> udfClass = ((GenericUDFBridge) udf).getUdfClass();
if (udfClass.equals(UDFToDouble.class)) {
targetType = ValueType.DOUBLE;
} else if (udfClass.equals(UDFToFloat.class)) {
targetType = ValueType.FLOAT;
} else if (udfClass.equals(UDFToLong.class)) {
targetType = ValueType.LONG;
}
} else if (udf instanceof GenericUDFToString) {
targetType = ValueType.STRING;
}
if (targetType == null) {
return null;
}
String virtualColumnExpr = DruidQuery.format("CAST(%s, '%s')", columnName, targetType.toString());
for (VirtualColumn column : virtualColumns) {
if (column instanceof ExpressionVirtualColumn && ((ExpressionVirtualColumn) column).getExpression().equals(virtualColumnExpr)) {
// Found an existing virtual column with same expression, no need to add another virtual column
return column.getOutputName();
}
}
Set<String> usedColumnNames = virtualColumns.stream().map(col -> col.getOutputName()).collect(Collectors.toSet());
final String name = SqlValidatorUtil.uniquify("vc", usedColumnNames, SqlValidatorUtil.EXPR_SUGGESTER);
ExpressionVirtualColumn expressionVirtualColumn = new ExpressionVirtualColumn(name, virtualColumnExpr, targetType, ExprMacroTable.nil());
virtualColumns.add(expressionVirtualColumn);
return name;
}
use of org.apache.druid.segment.VirtualColumn in project druid by druid-io.
the class ScanQueryEngine method process.
public Sequence<ScanResultValue> process(final ScanQuery query, final Segment segment, final ResponseContext responseContext) {
// "legacy" should be non-null due to toolChest.mergeResults
final boolean legacy = Preconditions.checkNotNull(query.isLegacy(), "Expected non-null 'legacy' parameter");
final Long numScannedRows = responseContext.getRowScanCount();
if (numScannedRows != null && numScannedRows >= query.getScanRowsLimit() && query.getTimeOrder().equals(ScanQuery.Order.NONE)) {
return Sequences.empty();
}
final boolean hasTimeout = QueryContexts.hasTimeout(query);
final Long timeoutAt = responseContext.getTimeoutTime();
final long start = System.currentTimeMillis();
final StorageAdapter adapter = segment.asStorageAdapter();
if (adapter == null) {
throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
}
final List<String> allColumns = new ArrayList<>();
if (query.getColumns() != null && !query.getColumns().isEmpty()) {
if (legacy && !query.getColumns().contains(LEGACY_TIMESTAMP_KEY)) {
allColumns.add(LEGACY_TIMESTAMP_KEY);
}
// Unless we're in legacy mode, allColumns equals query.getColumns() exactly. This is nice since it makes
// the compactedList form easier to use.
allColumns.addAll(query.getColumns());
} else {
final Set<String> availableColumns = Sets.newLinkedHashSet(Iterables.concat(Collections.singleton(legacy ? LEGACY_TIMESTAMP_KEY : ColumnHolder.TIME_COLUMN_NAME), Iterables.transform(Arrays.asList(query.getVirtualColumns().getVirtualColumns()), VirtualColumn::getOutputName), adapter.getAvailableDimensions(), adapter.getAvailableMetrics()));
allColumns.addAll(availableColumns);
if (legacy) {
allColumns.remove(ColumnHolder.TIME_COLUMN_NAME);
}
}
final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
Preconditions.checkArgument(intervals.size() == 1, "Can only handle a single interval, got[%s]", intervals);
final SegmentId segmentId = segment.getId();
final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getFilter()));
// If the row count is not set, set it to 0, else do nothing.
responseContext.addRowScanCount(0);
final long limit = calculateRemainingScanRowsLimit(query, responseContext);
return Sequences.concat(adapter.makeCursors(filter, intervals.get(0), query.getVirtualColumns(), Granularities.ALL, query.getTimeOrder().equals(ScanQuery.Order.DESCENDING) || (query.getTimeOrder().equals(ScanQuery.Order.NONE) && query.isDescending()), null).map(cursor -> new BaseSequence<>(new BaseSequence.IteratorMaker<ScanResultValue, Iterator<ScanResultValue>>() {
@Override
public Iterator<ScanResultValue> make() {
final List<BaseObjectColumnValueSelector> columnSelectors = new ArrayList<>(allColumns.size());
for (String column : allColumns) {
final BaseObjectColumnValueSelector selector;
if (legacy && LEGACY_TIMESTAMP_KEY.equals(column)) {
selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME);
} else {
selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(column);
}
columnSelectors.add(selector);
}
final int batchSize = query.getBatchSize();
return new Iterator<ScanResultValue>() {
private long offset = 0;
@Override
public boolean hasNext() {
return !cursor.isDone() && offset < limit;
}
@Override
public ScanResultValue next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
if (hasTimeout && System.currentTimeMillis() >= timeoutAt) {
throw new QueryTimeoutException(StringUtils.nonStrictFormat("Query [%s] timed out", query.getId()));
}
final long lastOffset = offset;
final Object events;
final ScanQuery.ResultFormat resultFormat = query.getResultFormat();
if (ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat)) {
events = rowsToCompactedList();
} else if (ScanQuery.ResultFormat.RESULT_FORMAT_LIST.equals(resultFormat)) {
events = rowsToList();
} else {
throw new UOE("resultFormat[%s] is not supported", resultFormat.toString());
}
responseContext.addRowScanCount(offset - lastOffset);
if (hasTimeout) {
responseContext.putTimeoutTime(timeoutAt - (System.currentTimeMillis() - start));
}
return new ScanResultValue(segmentId.toString(), allColumns, events);
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
private List<List<Object>> rowsToCompactedList() {
final List<List<Object>> events = new ArrayList<>(batchSize);
final long iterLimit = Math.min(limit, offset + batchSize);
for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
final List<Object> theEvent = new ArrayList<>(allColumns.size());
for (int j = 0; j < allColumns.size(); j++) {
theEvent.add(getColumnValue(j));
}
events.add(theEvent);
}
return events;
}
private List<Map<String, Object>> rowsToList() {
List<Map<String, Object>> events = Lists.newArrayListWithCapacity(batchSize);
final long iterLimit = Math.min(limit, offset + batchSize);
for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
final Map<String, Object> theEvent = new LinkedHashMap<>();
for (int j = 0; j < allColumns.size(); j++) {
theEvent.put(allColumns.get(j), getColumnValue(j));
}
events.add(theEvent);
}
return events;
}
private Object getColumnValue(int i) {
final BaseObjectColumnValueSelector selector = columnSelectors.get(i);
final Object value;
if (legacy && allColumns.get(i).equals(LEGACY_TIMESTAMP_KEY)) {
value = DateTimes.utc((long) selector.getObject());
} else {
value = selector == null ? null : selector.getObject();
}
return value;
}
};
}
@Override
public void cleanup(Iterator<ScanResultValue> iterFromMake) {
}
})));
}
use of org.apache.druid.segment.VirtualColumn in project druid by druid-io.
the class HashJoinSegmentStorageAdapterTest method test_determineBaseColumnsWithPreAndPostJoinVirtualColumns.
@Test
public void test_determineBaseColumnsWithPreAndPostJoinVirtualColumns() {
List<JoinableClause> joinableClauses = ImmutableList.of(factToCountryOnIsoCode(JoinType.LEFT));
JoinFilterPreAnalysis analysis = makeDefaultConfigPreAnalysis(null, joinableClauses, VirtualColumns.EMPTY);
HashJoinSegmentStorageAdapter adapter = new HashJoinSegmentStorageAdapter(factSegment.asStorageAdapter(), joinableClauses, analysis);
List<VirtualColumn> expectedPreJoin = ImmutableList.of(makeExpressionVirtualColumn("concat(countryIsoCode,'L')", "v0"), makeExpressionVirtualColumn("concat(countryIsoCode, countryNumber)", "v1"), makeExpressionVirtualColumn("channel_uniques - 1", "v2"), makeExpressionVirtualColumn("channel_uniques - __time", "v3"));
List<VirtualColumn> expectedPostJoin = ImmutableList.of(makeExpressionVirtualColumn("concat(countryIsoCode, dummyColumn)", "v4"), makeExpressionVirtualColumn("dummyMetric - __time", "v5"));
List<VirtualColumn> actualPreJoin = new ArrayList<>();
List<VirtualColumn> actualPostJoin = new ArrayList<>();
List<VirtualColumn> allVirtualColumns = new ArrayList<>();
allVirtualColumns.addAll(expectedPreJoin);
allVirtualColumns.addAll(expectedPostJoin);
adapter.determineBaseColumnsWithPreAndPostJoinVirtualColumns(VirtualColumns.create(allVirtualColumns), actualPreJoin, actualPostJoin);
Assert.assertEquals(expectedPreJoin, actualPreJoin);
Assert.assertEquals(expectedPostJoin, actualPostJoin);
}
use of org.apache.druid.segment.VirtualColumn in project druid by druid-io.
the class HashJoinSegmentStorageAdapter method makeCursors.
@Override
public Sequence<Cursor> makeCursors(@Nullable final Filter filter, @Nonnull final Interval interval, @Nonnull final VirtualColumns virtualColumns, @Nonnull final Granularity gran, final boolean descending, @Nullable final QueryMetrics<?> queryMetrics) {
final Filter combinedFilter = baseFilterAnd(filter);
if (clauses.isEmpty()) {
return baseAdapter.makeCursors(combinedFilter, interval, virtualColumns, gran, descending, queryMetrics);
}
// Filter pre-analysis key implied by the call to "makeCursors". We need to sanity-check that it matches
// the actual pre-analysis that was done. Note: we can't infer a rewrite config from the "makeCursors" call (it
// requires access to the query context) so we'll need to skip sanity-checking it, by re-using the one present
// in the cached key.)
final JoinFilterPreAnalysisKey keyIn = new JoinFilterPreAnalysisKey(joinFilterPreAnalysis.getKey().getRewriteConfig(), clauses, virtualColumns, combinedFilter);
final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey();
if (!keyIn.equals(keyCached)) {
// It is a bug if this happens. The implied key and the cached key should always match.
throw new ISE("Pre-analysis mismatch, cannot execute query");
}
final List<VirtualColumn> preJoinVirtualColumns = new ArrayList<>();
final List<VirtualColumn> postJoinVirtualColumns = new ArrayList<>();
determineBaseColumnsWithPreAndPostJoinVirtualColumns(virtualColumns, preJoinVirtualColumns, postJoinVirtualColumns);
// We merge the filter on base table specified by the user and filter on the base table that is pushed from
// the join
JoinFilterSplit joinFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis, baseFilter);
preJoinVirtualColumns.addAll(joinFilterSplit.getPushDownVirtualColumns());
final Sequence<Cursor> baseCursorSequence = baseAdapter.makeCursors(joinFilterSplit.getBaseTableFilter().isPresent() ? joinFilterSplit.getBaseTableFilter().get() : null, interval, VirtualColumns.create(preJoinVirtualColumns), gran, descending, queryMetrics);
Closer joinablesCloser = Closer.create();
return Sequences.<Cursor, Cursor>map(baseCursorSequence, cursor -> {
assert cursor != null;
Cursor retVal = cursor;
for (JoinableClause clause : clauses) {
retVal = HashJoinEngine.makeJoinCursor(retVal, clause, descending, joinablesCloser);
}
return PostJoinCursor.wrap(retVal, VirtualColumns.create(postJoinVirtualColumns), joinFilterSplit.getJoinTableFilter().orElse(null));
}).withBaggage(joinablesCloser);
}
use of org.apache.druid.segment.VirtualColumn in project druid by druid-io.
the class JoinFilterAnalyzer method computeJoinFilterPreAnalysis.
/**
* Before making per-segment filter splitting decisions, we first do a pre-analysis step
* where we convert the query filter (if any) into conjunctive normal form and then
* determine the structure of RHS filter rewrites (if any), since this information is shared across all
* per-segment operations.
*
* See {@link JoinFilterPreAnalysis} for details on the result of this pre-analysis step.
*
* @param key All the information needed to pre-analyze a filter
*
* @return A JoinFilterPreAnalysis containing information determined in this pre-analysis step.
*/
public static JoinFilterPreAnalysis computeJoinFilterPreAnalysis(final JoinFilterPreAnalysisKey key) {
final List<VirtualColumn> preJoinVirtualColumns = new ArrayList<>();
final List<VirtualColumn> postJoinVirtualColumns = new ArrayList<>();
final JoinableClauses joinableClauses = JoinableClauses.fromList(key.getJoinableClauses());
joinableClauses.splitVirtualColumns(key.getVirtualColumns(), preJoinVirtualColumns, postJoinVirtualColumns);
final JoinFilterPreAnalysis.Builder preAnalysisBuilder = new JoinFilterPreAnalysis.Builder(key, postJoinVirtualColumns);
if (key.getFilter() == null || !key.getRewriteConfig().isEnableFilterPushDown()) {
return preAnalysisBuilder.build();
}
List<Filter> normalizedOrClauses = Filters.toNormalizedOrClauses(key.getFilter());
List<Filter> normalizedBaseTableClauses = new ArrayList<>();
List<Filter> normalizedJoinTableClauses = new ArrayList<>();
for (Filter orClause : normalizedOrClauses) {
Set<String> reqColumns = orClause.getRequiredColumns();
if (joinableClauses.areSomeColumnsFromJoin(reqColumns) || areSomeColumnsFromPostJoinVirtualColumns(postJoinVirtualColumns, reqColumns)) {
normalizedJoinTableClauses.add(orClause);
} else {
normalizedBaseTableClauses.add(orClause);
}
}
preAnalysisBuilder.withNormalizedBaseTableClauses(normalizedBaseTableClauses).withNormalizedJoinTableClauses(normalizedJoinTableClauses);
if (!key.getRewriteConfig().isEnableFilterRewrite()) {
return preAnalysisBuilder.build();
}
// build the equicondition map, used for determining how the tables are connected through joins
Equiconditions equiconditions = preAnalysisBuilder.computeEquiconditionsFromJoinableClauses();
JoinFilterCorrelations correlations = JoinFilterCorrelations.computeJoinFilterCorrelations(normalizedJoinTableClauses, equiconditions, joinableClauses, key.getRewriteConfig().isEnableRewriteValueColumnFilters(), key.getRewriteConfig().getFilterRewriteMaxSize());
return preAnalysisBuilder.withCorrelations(correlations).build();
}
Aggregations