Search in sources :

Example 11 with VirtualColumn

use of org.apache.druid.segment.VirtualColumn in project hive by apache.

the class DruidStorageHandlerUtils method extractColName.

@Nullable
public static String extractColName(ExprNodeDesc expr, List<VirtualColumn> virtualColumns) {
    if (!druidSupportedTypeInfos.contains(expr.getTypeInfo())) {
        // We cannot pass the bloom filter to druid since bloom filter tests for exact object bytes.
        return null;
    }
    if (expr instanceof ExprNodeColumnDesc) {
        return ((ExprNodeColumnDesc) expr).getColumn();
    }
    ExprNodeGenericFuncDesc funcDesc = null;
    if (expr instanceof ExprNodeGenericFuncDesc) {
        funcDesc = (ExprNodeGenericFuncDesc) expr;
    }
    if (null == funcDesc) {
        return null;
    }
    GenericUDF udf = funcDesc.getGenericUDF();
    // bail out if its not a simple cast expression.
    if (funcDesc.getChildren().size() == 1 && funcDesc.getChildren().get(0) instanceof ExprNodeColumnDesc) {
        return null;
    }
    String columnName = ((ExprNodeColumnDesc) (funcDesc.getChildren().get(0))).getColumn();
    ValueType targetType = null;
    if (udf instanceof GenericUDFBridge) {
        Class<? extends UDF> udfClass = ((GenericUDFBridge) udf).getUdfClass();
        if (udfClass.equals(UDFToDouble.class)) {
            targetType = ValueType.DOUBLE;
        } else if (udfClass.equals(UDFToFloat.class)) {
            targetType = ValueType.FLOAT;
        } else if (udfClass.equals(UDFToLong.class)) {
            targetType = ValueType.LONG;
        }
    } else if (udf instanceof GenericUDFToString) {
        targetType = ValueType.STRING;
    }
    if (targetType == null) {
        return null;
    }
    String virtualColumnExpr = DruidQuery.format("CAST(%s, '%s')", columnName, targetType.toString());
    for (VirtualColumn column : virtualColumns) {
        if (column instanceof ExpressionVirtualColumn && ((ExpressionVirtualColumn) column).getExpression().equals(virtualColumnExpr)) {
            // Found an existing virtual column with same expression, no need to add another virtual column
            return column.getOutputName();
        }
    }
    Set<String> usedColumnNames = virtualColumns.stream().map(col -> col.getOutputName()).collect(Collectors.toSet());
    final String name = SqlValidatorUtil.uniquify("vc", usedColumnNames, SqlValidatorUtil.EXPR_SUGGESTER);
    ExpressionVirtualColumn expressionVirtualColumn = new ExpressionVirtualColumn(name, virtualColumnExpr, targetType, ExprMacroTable.nil());
    virtualColumns.add(expressionVirtualColumn);
    return name;
}
Also used : Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) HttpMethod(org.jboss.netty.handler.codec.http.HttpMethod) IndexSpec(org.apache.druid.segment.IndexSpec) FileStatus(org.apache.hadoop.fs.FileStatus) FunctionRegistry(org.apache.hadoop.hive.ql.exec.FunctionRegistry) StringFullResponseHandler(org.apache.druid.java.util.http.client.response.StringFullResponseHandler) HdfsDataSegmentPusherConfig(org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig) Pair(org.apache.druid.java.util.common.Pair) InetAddress(java.net.InetAddress) SqlValidatorUtil(org.apache.calcite.sql.validate.SqlValidatorUtil) JodaUtils(org.apache.druid.java.util.common.JodaUtils) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) ExprNodeEvaluatorFactory(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory) TimestampParseExprMacro(org.apache.druid.query.expression.TimestampParseExprMacro) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) InputStreamResponseHandler(org.apache.druid.java.util.http.client.response.InputStreamResponseHandler) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Set(java.util.Set) ValueType(org.apache.druid.segment.column.ValueType) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) DimFilter(org.apache.druid.query.filter.DimFilter) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) ISOChronology(org.joda.time.chrono.ISOChronology) BloomFilterSerializersModule(org.apache.druid.guice.BloomFilterSerializersModule) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) Iterables(com.google.common.collect.Iterables) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) Granularity(org.apache.druid.java.util.common.granularity.Granularity) HttpClient(org.apache.druid.java.util.http.client.HttpClient) UDFToLong(org.apache.hadoop.hive.ql.udf.UDFToLong) ScanQuery(org.apache.druid.query.scan.ScanQuery) TopNQuery(org.apache.druid.query.topn.TopNQuery) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) StringComparators(org.apache.druid.query.ordering.StringComparators) ResultIterator(org.skife.jdbi.v2.ResultIterator) DruidQuery(org.apache.calcite.adapter.druid.DruidQuery) Constants(org.apache.hadoop.hive.conf.Constants) Nullable(javax.annotation.Nullable) RegexpExtractExprMacro(org.apache.druid.query.expression.RegexpExtractExprMacro) HttpHeaders(org.jboss.netty.handler.codec.http.HttpHeaders) Properties(java.util.Properties) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) StringComparator(org.apache.druid.query.ordering.StringComparator) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) HandleCallback(org.skife.jdbi.v2.tweak.HandleCallback) VirtualColumn(org.apache.druid.segment.VirtualColumn) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) UDFToDouble(org.apache.hadoop.hive.ql.udf.UDFToDouble) Throwables(com.google.common.base.Throwables) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) GenericUDFInBloomFilter(org.apache.hadoop.hive.ql.udf.generic.GenericUDFInBloomFilter) IOException(java.io.IOException) NoneShardSpec(org.apache.druid.timeline.partition.NoneShardSpec) UnknownHostException(java.net.UnknownHostException) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) Table(org.apache.hadoop.hive.metastore.api.Table) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) GenericUDFBetween(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween) ExecutionException(java.util.concurrent.ExecutionException) HttpResponseStatus(org.jboss.netty.handler.codec.http.HttpResponseStatus) TimestampExtractExprMacro(org.apache.druid.query.expression.TimestampExtractExprMacro) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) RetryProxy(org.apache.hadoop.io.retry.RetryProxy) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) URL(java.net.URL) DruidConstants(org.apache.hadoop.hive.druid.conf.DruidConstants) AvroStreamInputRowParser(org.apache.hadoop.hive.druid.json.AvroStreamInputRowParser) LoggerFactory(org.slf4j.LoggerFactory) RetryPolicies(org.apache.hadoop.io.retry.RetryPolicies) TmpFileSegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory) ByteBuffer(java.nio.ByteBuffer) UDF(org.apache.hadoop.hive.ql.exec.UDF) Druids(org.apache.druid.query.Druids) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) TimestampFormatExprMacro(org.apache.druid.query.expression.TimestampFormatExprMacro) LikeExprMacro(org.apache.druid.query.expression.LikeExprMacro) Path(org.apache.hadoop.fs.Path) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ConciseBitmapSerdeFactory(org.apache.druid.segment.data.ConciseBitmapSerdeFactory) ByteArrayMapper(org.skife.jdbi.v2.util.ByteArrayMapper) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) ImmutableSet(com.google.common.collect.ImmutableSet) MetadataStorageTablesConfig(org.apache.druid.metadata.MetadataStorageTablesConfig) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TimestampCeilExprMacro(org.apache.druid.query.expression.TimestampCeilExprMacro) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) Interner(com.google.common.collect.Interner) BloomKFilterHolder(org.apache.druid.query.filter.BloomKFilterHolder) Collectors(java.util.stream.Collectors) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) Objects(java.util.Objects) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MapUtils(org.apache.druid.java.util.common.MapUtils) ExprNodeDynamicValueEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeDynamicValueEvaluator) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) TimestampShiftExprMacro(org.apache.druid.query.expression.TimestampShiftExprMacro) UDFToFloat(org.apache.hadoop.hive.ql.udf.UDFToFloat) AvroParseSpec(org.apache.hadoop.hive.druid.json.AvroParseSpec) ImmutableList(com.google.common.collect.ImmutableList) NoopEmitter(org.apache.druid.java.util.emitter.core.NoopEmitter) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Request(org.apache.druid.java.util.http.client.Request) PrimitiveObjectInspectorUtils(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) BytesWritable(org.apache.hadoop.io.BytesWritable) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) OutputStream(java.io.OutputStream) SQLMetadataConnector(org.apache.druid.metadata.SQLMetadataConnector) Period(org.joda.time.Period) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VirtualColumns(org.apache.druid.segment.VirtualColumns) Logger(org.slf4j.Logger) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) TimestampFloorExprMacro(org.apache.druid.query.expression.TimestampFloorExprMacro) TypeInfoFactory(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory) Iterator(java.util.Iterator) Folder3(org.skife.jdbi.v2.Folder3) MalformedURLException(java.net.MalformedURLException) MySQLConnector(org.apache.druid.metadata.storage.mysql.MySQLConnector) TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) HiveConf(org.apache.hadoop.hive.conf.HiveConf) DateTime(org.joda.time.DateTime) Interners(com.google.common.collect.Interners) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) Query(org.skife.jdbi.v2.Query) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) TimeUnit(java.util.concurrent.TimeUnit) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) BitmapSerdeFactory(org.apache.druid.segment.data.BitmapSerdeFactory) Handle(org.skife.jdbi.v2.Handle) Ordering(com.google.common.collect.Ordering) NullHandling(org.apache.druid.common.config.NullHandling) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) BloomDimFilter(org.apache.druid.query.filter.BloomDimFilter) TrimExprMacro(org.apache.druid.query.expression.TrimExprMacro) StringFullResponseHolder(org.apache.druid.java.util.http.client.response.StringFullResponseHolder) FloatSumAggregatorFactory(org.apache.druid.query.aggregation.FloatSumAggregatorFactory) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) InputStream(java.io.InputStream) ValueType(org.apache.druid.segment.column.ValueType) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) UDFToFloat(org.apache.hadoop.hive.ql.udf.UDFToFloat) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VirtualColumn(org.apache.druid.segment.VirtualColumn) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) Nullable(javax.annotation.Nullable)

Example 12 with VirtualColumn

use of org.apache.druid.segment.VirtualColumn in project druid by druid-io.

the class ScanQueryEngine method process.

public Sequence<ScanResultValue> process(final ScanQuery query, final Segment segment, final ResponseContext responseContext) {
    // "legacy" should be non-null due to toolChest.mergeResults
    final boolean legacy = Preconditions.checkNotNull(query.isLegacy(), "Expected non-null 'legacy' parameter");
    final Long numScannedRows = responseContext.getRowScanCount();
    if (numScannedRows != null && numScannedRows >= query.getScanRowsLimit() && query.getTimeOrder().equals(ScanQuery.Order.NONE)) {
        return Sequences.empty();
    }
    final boolean hasTimeout = QueryContexts.hasTimeout(query);
    final Long timeoutAt = responseContext.getTimeoutTime();
    final long start = System.currentTimeMillis();
    final StorageAdapter adapter = segment.asStorageAdapter();
    if (adapter == null) {
        throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
    }
    final List<String> allColumns = new ArrayList<>();
    if (query.getColumns() != null && !query.getColumns().isEmpty()) {
        if (legacy && !query.getColumns().contains(LEGACY_TIMESTAMP_KEY)) {
            allColumns.add(LEGACY_TIMESTAMP_KEY);
        }
        // Unless we're in legacy mode, allColumns equals query.getColumns() exactly. This is nice since it makes
        // the compactedList form easier to use.
        allColumns.addAll(query.getColumns());
    } else {
        final Set<String> availableColumns = Sets.newLinkedHashSet(Iterables.concat(Collections.singleton(legacy ? LEGACY_TIMESTAMP_KEY : ColumnHolder.TIME_COLUMN_NAME), Iterables.transform(Arrays.asList(query.getVirtualColumns().getVirtualColumns()), VirtualColumn::getOutputName), adapter.getAvailableDimensions(), adapter.getAvailableMetrics()));
        allColumns.addAll(availableColumns);
        if (legacy) {
            allColumns.remove(ColumnHolder.TIME_COLUMN_NAME);
        }
    }
    final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
    Preconditions.checkArgument(intervals.size() == 1, "Can only handle a single interval, got[%s]", intervals);
    final SegmentId segmentId = segment.getId();
    final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getFilter()));
    // If the row count is not set, set it to 0, else do nothing.
    responseContext.addRowScanCount(0);
    final long limit = calculateRemainingScanRowsLimit(query, responseContext);
    return Sequences.concat(adapter.makeCursors(filter, intervals.get(0), query.getVirtualColumns(), Granularities.ALL, query.getTimeOrder().equals(ScanQuery.Order.DESCENDING) || (query.getTimeOrder().equals(ScanQuery.Order.NONE) && query.isDescending()), null).map(cursor -> new BaseSequence<>(new BaseSequence.IteratorMaker<ScanResultValue, Iterator<ScanResultValue>>() {

        @Override
        public Iterator<ScanResultValue> make() {
            final List<BaseObjectColumnValueSelector> columnSelectors = new ArrayList<>(allColumns.size());
            for (String column : allColumns) {
                final BaseObjectColumnValueSelector selector;
                if (legacy && LEGACY_TIMESTAMP_KEY.equals(column)) {
                    selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME);
                } else {
                    selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(column);
                }
                columnSelectors.add(selector);
            }
            final int batchSize = query.getBatchSize();
            return new Iterator<ScanResultValue>() {

                private long offset = 0;

                @Override
                public boolean hasNext() {
                    return !cursor.isDone() && offset < limit;
                }

                @Override
                public ScanResultValue next() {
                    if (!hasNext()) {
                        throw new NoSuchElementException();
                    }
                    if (hasTimeout && System.currentTimeMillis() >= timeoutAt) {
                        throw new QueryTimeoutException(StringUtils.nonStrictFormat("Query [%s] timed out", query.getId()));
                    }
                    final long lastOffset = offset;
                    final Object events;
                    final ScanQuery.ResultFormat resultFormat = query.getResultFormat();
                    if (ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat)) {
                        events = rowsToCompactedList();
                    } else if (ScanQuery.ResultFormat.RESULT_FORMAT_LIST.equals(resultFormat)) {
                        events = rowsToList();
                    } else {
                        throw new UOE("resultFormat[%s] is not supported", resultFormat.toString());
                    }
                    responseContext.addRowScanCount(offset - lastOffset);
                    if (hasTimeout) {
                        responseContext.putTimeoutTime(timeoutAt - (System.currentTimeMillis() - start));
                    }
                    return new ScanResultValue(segmentId.toString(), allColumns, events);
                }

                @Override
                public void remove() {
                    throw new UnsupportedOperationException();
                }

                private List<List<Object>> rowsToCompactedList() {
                    final List<List<Object>> events = new ArrayList<>(batchSize);
                    final long iterLimit = Math.min(limit, offset + batchSize);
                    for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
                        final List<Object> theEvent = new ArrayList<>(allColumns.size());
                        for (int j = 0; j < allColumns.size(); j++) {
                            theEvent.add(getColumnValue(j));
                        }
                        events.add(theEvent);
                    }
                    return events;
                }

                private List<Map<String, Object>> rowsToList() {
                    List<Map<String, Object>> events = Lists.newArrayListWithCapacity(batchSize);
                    final long iterLimit = Math.min(limit, offset + batchSize);
                    for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
                        final Map<String, Object> theEvent = new LinkedHashMap<>();
                        for (int j = 0; j < allColumns.size(); j++) {
                            theEvent.put(allColumns.get(j), getColumnValue(j));
                        }
                        events.add(theEvent);
                    }
                    return events;
                }

                private Object getColumnValue(int i) {
                    final BaseObjectColumnValueSelector selector = columnSelectors.get(i);
                    final Object value;
                    if (legacy && allColumns.get(i).equals(LEGACY_TIMESTAMP_KEY)) {
                        value = DateTimes.utc((long) selector.getObject());
                    } else {
                        value = selector == null ? null : selector.getObject();
                    }
                    return value;
                }
            };
        }

        @Override
        public void cleanup(Iterator<ScanResultValue> iterFromMake) {
        }
    })));
}
Also used : Iterables(com.google.common.collect.Iterables) Arrays(java.util.Arrays) StorageAdapter(org.apache.druid.segment.StorageAdapter) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) Map(java.util.Map) UOE(org.apache.druid.java.util.common.UOE) NoSuchElementException(java.util.NoSuchElementException) BaseObjectColumnValueSelector(org.apache.druid.segment.BaseObjectColumnValueSelector) Sequences(org.apache.druid.java.util.common.guava.Sequences) Segment(org.apache.druid.segment.Segment) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) Iterator(java.util.Iterator) ResponseContext(org.apache.druid.query.context.ResponseContext) VirtualColumn(org.apache.druid.segment.VirtualColumn) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) Sets(com.google.common.collect.Sets) QueryContexts(org.apache.druid.query.QueryContexts) Granularities(org.apache.druid.java.util.common.granularity.Granularities) List(java.util.List) QueryTimeoutException(org.apache.druid.query.QueryTimeoutException) Preconditions(com.google.common.base.Preconditions) BaseSequence(org.apache.druid.java.util.common.guava.BaseSequence) SegmentId(org.apache.druid.timeline.SegmentId) Filters(org.apache.druid.segment.filter.Filters) Collections(java.util.Collections) Filter(org.apache.druid.query.filter.Filter) ArrayList(java.util.ArrayList) StorageAdapter(org.apache.druid.segment.StorageAdapter) QueryTimeoutException(org.apache.druid.query.QueryTimeoutException) Iterator(java.util.Iterator) ISE(org.apache.druid.java.util.common.ISE) ArrayList(java.util.ArrayList) List(java.util.List) SegmentId(org.apache.druid.timeline.SegmentId) BaseObjectColumnValueSelector(org.apache.druid.segment.BaseObjectColumnValueSelector) UOE(org.apache.druid.java.util.common.UOE) BaseSequence(org.apache.druid.java.util.common.guava.BaseSequence) Filter(org.apache.druid.query.filter.Filter) VirtualColumn(org.apache.druid.segment.VirtualColumn) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) NoSuchElementException(java.util.NoSuchElementException) Interval(org.joda.time.Interval)

Example 13 with VirtualColumn

use of org.apache.druid.segment.VirtualColumn in project druid by druid-io.

the class HashJoinSegmentStorageAdapterTest method test_determineBaseColumnsWithPreAndPostJoinVirtualColumns.

@Test
public void test_determineBaseColumnsWithPreAndPostJoinVirtualColumns() {
    List<JoinableClause> joinableClauses = ImmutableList.of(factToCountryOnIsoCode(JoinType.LEFT));
    JoinFilterPreAnalysis analysis = makeDefaultConfigPreAnalysis(null, joinableClauses, VirtualColumns.EMPTY);
    HashJoinSegmentStorageAdapter adapter = new HashJoinSegmentStorageAdapter(factSegment.asStorageAdapter(), joinableClauses, analysis);
    List<VirtualColumn> expectedPreJoin = ImmutableList.of(makeExpressionVirtualColumn("concat(countryIsoCode,'L')", "v0"), makeExpressionVirtualColumn("concat(countryIsoCode, countryNumber)", "v1"), makeExpressionVirtualColumn("channel_uniques - 1", "v2"), makeExpressionVirtualColumn("channel_uniques - __time", "v3"));
    List<VirtualColumn> expectedPostJoin = ImmutableList.of(makeExpressionVirtualColumn("concat(countryIsoCode, dummyColumn)", "v4"), makeExpressionVirtualColumn("dummyMetric - __time", "v5"));
    List<VirtualColumn> actualPreJoin = new ArrayList<>();
    List<VirtualColumn> actualPostJoin = new ArrayList<>();
    List<VirtualColumn> allVirtualColumns = new ArrayList<>();
    allVirtualColumns.addAll(expectedPreJoin);
    allVirtualColumns.addAll(expectedPostJoin);
    adapter.determineBaseColumnsWithPreAndPostJoinVirtualColumns(VirtualColumns.create(allVirtualColumns), actualPreJoin, actualPostJoin);
    Assert.assertEquals(expectedPreJoin, actualPreJoin);
    Assert.assertEquals(expectedPostJoin, actualPostJoin);
}
Also used : JoinFilterPreAnalysis(org.apache.druid.segment.join.filter.JoinFilterPreAnalysis) ArrayList(java.util.ArrayList) VirtualColumn(org.apache.druid.segment.VirtualColumn) Test(org.junit.Test)

Example 14 with VirtualColumn

use of org.apache.druid.segment.VirtualColumn in project druid by druid-io.

the class HashJoinSegmentStorageAdapter method makeCursors.

@Override
public Sequence<Cursor> makeCursors(@Nullable final Filter filter, @Nonnull final Interval interval, @Nonnull final VirtualColumns virtualColumns, @Nonnull final Granularity gran, final boolean descending, @Nullable final QueryMetrics<?> queryMetrics) {
    final Filter combinedFilter = baseFilterAnd(filter);
    if (clauses.isEmpty()) {
        return baseAdapter.makeCursors(combinedFilter, interval, virtualColumns, gran, descending, queryMetrics);
    }
    // Filter pre-analysis key implied by the call to "makeCursors". We need to sanity-check that it matches
    // the actual pre-analysis that was done. Note: we can't infer a rewrite config from the "makeCursors" call (it
    // requires access to the query context) so we'll need to skip sanity-checking it, by re-using the one present
    // in the cached key.)
    final JoinFilterPreAnalysisKey keyIn = new JoinFilterPreAnalysisKey(joinFilterPreAnalysis.getKey().getRewriteConfig(), clauses, virtualColumns, combinedFilter);
    final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey();
    if (!keyIn.equals(keyCached)) {
        // It is a bug if this happens. The implied key and the cached key should always match.
        throw new ISE("Pre-analysis mismatch, cannot execute query");
    }
    final List<VirtualColumn> preJoinVirtualColumns = new ArrayList<>();
    final List<VirtualColumn> postJoinVirtualColumns = new ArrayList<>();
    determineBaseColumnsWithPreAndPostJoinVirtualColumns(virtualColumns, preJoinVirtualColumns, postJoinVirtualColumns);
    // We merge the filter on base table specified by the user and filter on the base table that is pushed from
    // the join
    JoinFilterSplit joinFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis, baseFilter);
    preJoinVirtualColumns.addAll(joinFilterSplit.getPushDownVirtualColumns());
    final Sequence<Cursor> baseCursorSequence = baseAdapter.makeCursors(joinFilterSplit.getBaseTableFilter().isPresent() ? joinFilterSplit.getBaseTableFilter().get() : null, interval, VirtualColumns.create(preJoinVirtualColumns), gran, descending, queryMetrics);
    Closer joinablesCloser = Closer.create();
    return Sequences.<Cursor, Cursor>map(baseCursorSequence, cursor -> {
        assert cursor != null;
        Cursor retVal = cursor;
        for (JoinableClause clause : clauses) {
            retVal = HashJoinEngine.makeJoinCursor(retVal, clause, descending, joinablesCloser);
        }
        return PostJoinCursor.wrap(retVal, VirtualColumns.create(postJoinVirtualColumns), joinFilterSplit.getJoinTableFilter().orElse(null));
    }).withBaggage(joinablesCloser);
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) Indexed(org.apache.druid.segment.data.Indexed) Arrays(java.util.Arrays) Granularity(org.apache.druid.java.util.common.granularity.Granularity) QueryMetrics(org.apache.druid.query.QueryMetrics) Metadata(org.apache.druid.segment.Metadata) StorageAdapter(org.apache.druid.segment.StorageAdapter) ArrayList(java.util.ArrayList) JoinFilterSplit(org.apache.druid.segment.join.filter.JoinFilterSplit) HashSet(java.util.HashSet) VectorCursor(org.apache.druid.segment.vector.VectorCursor) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ListIndexed(org.apache.druid.segment.data.ListIndexed) JoinFilterPreAnalysisKey(org.apache.druid.segment.join.filter.JoinFilterPreAnalysisKey) Nonnull(javax.annotation.Nonnull) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) LinkedHashSet(java.util.LinkedHashSet) Sequence(org.apache.druid.java.util.common.guava.Sequence) VirtualColumns(org.apache.druid.segment.VirtualColumns) Closer(org.apache.druid.java.util.common.io.Closer) VirtualColumn(org.apache.druid.segment.VirtualColumn) DateTime(org.joda.time.DateTime) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) JoinFilterPreAnalysis(org.apache.druid.segment.join.filter.JoinFilterPreAnalysis) List(java.util.List) Cursor(org.apache.druid.segment.Cursor) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) Optional(java.util.Optional) JoinFilterAnalyzer(org.apache.druid.segment.join.filter.JoinFilterAnalyzer) Filters(org.apache.druid.segment.filter.Filters) Filter(org.apache.druid.query.filter.Filter) Filter(org.apache.druid.query.filter.Filter) JoinFilterPreAnalysisKey(org.apache.druid.segment.join.filter.JoinFilterPreAnalysisKey) JoinFilterSplit(org.apache.druid.segment.join.filter.JoinFilterSplit) ArrayList(java.util.ArrayList) ISE(org.apache.druid.java.util.common.ISE) VirtualColumn(org.apache.druid.segment.VirtualColumn) VectorCursor(org.apache.druid.segment.vector.VectorCursor) Cursor(org.apache.druid.segment.Cursor)

Example 15 with VirtualColumn

use of org.apache.druid.segment.VirtualColumn in project druid by druid-io.

the class JoinFilterAnalyzer method computeJoinFilterPreAnalysis.

/**
 * Before making per-segment filter splitting decisions, we first do a pre-analysis step
 * where we convert the query filter (if any) into conjunctive normal form and then
 * determine the structure of RHS filter rewrites (if any), since this information is shared across all
 * per-segment operations.
 *
 * See {@link JoinFilterPreAnalysis} for details on the result of this pre-analysis step.
 *
 * @param key All the information needed to pre-analyze a filter
 *
 * @return A JoinFilterPreAnalysis containing information determined in this pre-analysis step.
 */
public static JoinFilterPreAnalysis computeJoinFilterPreAnalysis(final JoinFilterPreAnalysisKey key) {
    final List<VirtualColumn> preJoinVirtualColumns = new ArrayList<>();
    final List<VirtualColumn> postJoinVirtualColumns = new ArrayList<>();
    final JoinableClauses joinableClauses = JoinableClauses.fromList(key.getJoinableClauses());
    joinableClauses.splitVirtualColumns(key.getVirtualColumns(), preJoinVirtualColumns, postJoinVirtualColumns);
    final JoinFilterPreAnalysis.Builder preAnalysisBuilder = new JoinFilterPreAnalysis.Builder(key, postJoinVirtualColumns);
    if (key.getFilter() == null || !key.getRewriteConfig().isEnableFilterPushDown()) {
        return preAnalysisBuilder.build();
    }
    List<Filter> normalizedOrClauses = Filters.toNormalizedOrClauses(key.getFilter());
    List<Filter> normalizedBaseTableClauses = new ArrayList<>();
    List<Filter> normalizedJoinTableClauses = new ArrayList<>();
    for (Filter orClause : normalizedOrClauses) {
        Set<String> reqColumns = orClause.getRequiredColumns();
        if (joinableClauses.areSomeColumnsFromJoin(reqColumns) || areSomeColumnsFromPostJoinVirtualColumns(postJoinVirtualColumns, reqColumns)) {
            normalizedJoinTableClauses.add(orClause);
        } else {
            normalizedBaseTableClauses.add(orClause);
        }
    }
    preAnalysisBuilder.withNormalizedBaseTableClauses(normalizedBaseTableClauses).withNormalizedJoinTableClauses(normalizedJoinTableClauses);
    if (!key.getRewriteConfig().isEnableFilterRewrite()) {
        return preAnalysisBuilder.build();
    }
    // build the equicondition map, used for determining how the tables are connected through joins
    Equiconditions equiconditions = preAnalysisBuilder.computeEquiconditionsFromJoinableClauses();
    JoinFilterCorrelations correlations = JoinFilterCorrelations.computeJoinFilterCorrelations(normalizedJoinTableClauses, equiconditions, joinableClauses, key.getRewriteConfig().isEnableRewriteValueColumnFilters(), key.getRewriteConfig().getFilterRewriteMaxSize());
    return preAnalysisBuilder.withCorrelations(correlations).build();
}
Also used : ArrayList(java.util.ArrayList) SelectorFilter(org.apache.druid.segment.filter.SelectorFilter) FalseFilter(org.apache.druid.segment.filter.FalseFilter) OrFilter(org.apache.druid.segment.filter.OrFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) Filter(org.apache.druid.query.filter.Filter) VirtualColumn(org.apache.druid.segment.VirtualColumn) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn)

Aggregations

VirtualColumn (org.apache.druid.segment.VirtualColumn)19 ArrayList (java.util.ArrayList)10 ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)9 Filter (org.apache.druid.query.filter.Filter)7 InDimFilter (org.apache.druid.query.filter.InDimFilter)5 FalseFilter (org.apache.druid.segment.filter.FalseFilter)5 OrFilter (org.apache.druid.segment.filter.OrFilter)5 SelectorFilter (org.apache.druid.segment.filter.SelectorFilter)5 HashSet (java.util.HashSet)4 List (java.util.List)4 Set (java.util.Set)4 Test (org.junit.Test)4 ImmutableSet (com.google.common.collect.ImmutableSet)3 Lists (com.google.common.collect.Lists)3 Arrays (java.util.Arrays)3 Nullable (javax.annotation.Nullable)3 Interval (org.joda.time.Interval)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 Iterables (com.google.common.collect.Iterables)2 Collections (java.util.Collections)2