Search in sources :

Example 21 with VirtualColumns

use of org.apache.druid.segment.VirtualColumns in project druid by druid-io.

the class VirtualColumnsTest method testGetColumnCapabilitiesWithFallbackDoubleBase.

@Test
public void testGetColumnCapabilitiesWithFallbackDoubleBase() {
    final VirtualColumns virtualColumns = makeVirtualColumns();
    final ColumnInspector baseInspector = column -> {
        if (REAL_COLUMN_NAME.equals(column)) {
            return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ColumnType.DOUBLE);
        } else {
            return null;
        }
    };
    Assert.assertEquals(ValueType.FLOAT, virtualColumns.getColumnCapabilitiesWithFallback(baseInspector, "expr").getType());
    Assert.assertEquals(ValueType.DOUBLE, virtualColumns.getColumnCapabilitiesWithFallback(baseInspector, "expr2").getType());
    Assert.assertEquals(ValueType.DOUBLE, virtualColumns.getColumnCapabilitiesWithFallback(baseInspector, REAL_COLUMN_NAME).getType());
}
Also used : Arrays(java.util.Arrays) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) ExtractionFn(org.apache.druid.query.extraction.ExtractionFn) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) IdLookup(org.apache.druid.segment.IdLookup) IndexedInts(org.apache.druid.segment.data.IndexedInts) BaseFloatColumnValueSelector(org.apache.druid.segment.BaseFloatColumnValueSelector) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) ImmutableList(com.google.common.collect.ImmutableList) DimensionSelector(org.apache.druid.segment.DimensionSelector) TestLongColumnSelector(org.apache.druid.segment.TestLongColumnSelector) BucketExtractionFn(org.apache.druid.query.extraction.BucketExtractionFn) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) BaseObjectColumnValueSelector(org.apache.druid.segment.BaseObjectColumnValueSelector) ExpectedException(org.junit.rules.ExpectedException) Nullable(javax.annotation.Nullable) ValueMatcher(org.apache.druid.query.filter.ValueMatcher) ColumnInspector(org.apache.druid.segment.ColumnInspector) Longs(com.google.common.primitives.Longs) VirtualColumns(org.apache.druid.segment.VirtualColumns) VirtualColumn(org.apache.druid.segment.VirtualColumn) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ValueType(org.apache.druid.segment.column.ValueType) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) DimensionDictionarySelector(org.apache.druid.segment.DimensionDictionarySelector) DimensionSelectorUtils(org.apache.druid.segment.DimensionSelectorUtils) ZeroIndexedInts(org.apache.druid.segment.data.ZeroIndexedInts) TestHelper(org.apache.druid.segment.TestHelper) List(java.util.List) BaseLongColumnValueSelector(org.apache.druid.segment.BaseLongColumnValueSelector) ColumnCapabilitiesImpl(org.apache.druid.segment.column.ColumnCapabilitiesImpl) Rule(org.junit.Rule) Predicate(com.google.common.base.Predicate) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) ColumnType(org.apache.druid.segment.column.ColumnType) Assert(org.junit.Assert) ColumnInspector(org.apache.druid.segment.ColumnInspector) VirtualColumns(org.apache.druid.segment.VirtualColumns) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 22 with VirtualColumns

use of org.apache.druid.segment.VirtualColumns in project hive by apache.

the class DruidStorageHandlerUtils method extractColName.

@Nullable
public static String extractColName(ExprNodeDesc expr, List<VirtualColumn> virtualColumns) {
    if (!druidSupportedTypeInfos.contains(expr.getTypeInfo())) {
        // We cannot pass the bloom filter to druid since bloom filter tests for exact object bytes.
        return null;
    }
    if (expr instanceof ExprNodeColumnDesc) {
        return ((ExprNodeColumnDesc) expr).getColumn();
    }
    ExprNodeGenericFuncDesc funcDesc = null;
    if (expr instanceof ExprNodeGenericFuncDesc) {
        funcDesc = (ExprNodeGenericFuncDesc) expr;
    }
    if (null == funcDesc) {
        return null;
    }
    GenericUDF udf = funcDesc.getGenericUDF();
    // bail out if its not a simple cast expression.
    if (funcDesc.getChildren().size() == 1 && funcDesc.getChildren().get(0) instanceof ExprNodeColumnDesc) {
        return null;
    }
    String columnName = ((ExprNodeColumnDesc) (funcDesc.getChildren().get(0))).getColumn();
    ValueType targetType = null;
    if (udf instanceof GenericUDFBridge) {
        Class<? extends UDF> udfClass = ((GenericUDFBridge) udf).getUdfClass();
        if (udfClass.equals(UDFToDouble.class)) {
            targetType = ValueType.DOUBLE;
        } else if (udfClass.equals(UDFToFloat.class)) {
            targetType = ValueType.FLOAT;
        } else if (udfClass.equals(UDFToLong.class)) {
            targetType = ValueType.LONG;
        }
    } else if (udf instanceof GenericUDFToString) {
        targetType = ValueType.STRING;
    }
    if (targetType == null) {
        return null;
    }
    String virtualColumnExpr = DruidQuery.format("CAST(%s, '%s')", columnName, targetType.toString());
    for (VirtualColumn column : virtualColumns) {
        if (column instanceof ExpressionVirtualColumn && ((ExpressionVirtualColumn) column).getExpression().equals(virtualColumnExpr)) {
            // Found an existing virtual column with same expression, no need to add another virtual column
            return column.getOutputName();
        }
    }
    Set<String> usedColumnNames = virtualColumns.stream().map(col -> col.getOutputName()).collect(Collectors.toSet());
    final String name = SqlValidatorUtil.uniquify("vc", usedColumnNames, SqlValidatorUtil.EXPR_SUGGESTER);
    ExpressionVirtualColumn expressionVirtualColumn = new ExpressionVirtualColumn(name, virtualColumnExpr, targetType, ExprMacroTable.nil());
    virtualColumns.add(expressionVirtualColumn);
    return name;
}
Also used : Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) HttpMethod(org.jboss.netty.handler.codec.http.HttpMethod) IndexSpec(org.apache.druid.segment.IndexSpec) FileStatus(org.apache.hadoop.fs.FileStatus) FunctionRegistry(org.apache.hadoop.hive.ql.exec.FunctionRegistry) StringFullResponseHandler(org.apache.druid.java.util.http.client.response.StringFullResponseHandler) HdfsDataSegmentPusherConfig(org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig) Pair(org.apache.druid.java.util.common.Pair) InetAddress(java.net.InetAddress) SqlValidatorUtil(org.apache.calcite.sql.validate.SqlValidatorUtil) JodaUtils(org.apache.druid.java.util.common.JodaUtils) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) ExprNodeEvaluatorFactory(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory) TimestampParseExprMacro(org.apache.druid.query.expression.TimestampParseExprMacro) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) InputStreamResponseHandler(org.apache.druid.java.util.http.client.response.InputStreamResponseHandler) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Set(java.util.Set) ValueType(org.apache.druid.segment.column.ValueType) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) DimFilter(org.apache.druid.query.filter.DimFilter) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) ISOChronology(org.joda.time.chrono.ISOChronology) BloomFilterSerializersModule(org.apache.druid.guice.BloomFilterSerializersModule) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) Iterables(com.google.common.collect.Iterables) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) Granularity(org.apache.druid.java.util.common.granularity.Granularity) HttpClient(org.apache.druid.java.util.http.client.HttpClient) UDFToLong(org.apache.hadoop.hive.ql.udf.UDFToLong) ScanQuery(org.apache.druid.query.scan.ScanQuery) TopNQuery(org.apache.druid.query.topn.TopNQuery) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) StringComparators(org.apache.druid.query.ordering.StringComparators) ResultIterator(org.skife.jdbi.v2.ResultIterator) DruidQuery(org.apache.calcite.adapter.druid.DruidQuery) Constants(org.apache.hadoop.hive.conf.Constants) Nullable(javax.annotation.Nullable) RegexpExtractExprMacro(org.apache.druid.query.expression.RegexpExtractExprMacro) HttpHeaders(org.jboss.netty.handler.codec.http.HttpHeaders) Properties(java.util.Properties) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) StringComparator(org.apache.druid.query.ordering.StringComparator) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) HandleCallback(org.skife.jdbi.v2.tweak.HandleCallback) VirtualColumn(org.apache.druid.segment.VirtualColumn) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) UDFToDouble(org.apache.hadoop.hive.ql.udf.UDFToDouble) Throwables(com.google.common.base.Throwables) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) GenericUDFInBloomFilter(org.apache.hadoop.hive.ql.udf.generic.GenericUDFInBloomFilter) IOException(java.io.IOException) NoneShardSpec(org.apache.druid.timeline.partition.NoneShardSpec) UnknownHostException(java.net.UnknownHostException) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) Table(org.apache.hadoop.hive.metastore.api.Table) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) GenericUDFBetween(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween) ExecutionException(java.util.concurrent.ExecutionException) HttpResponseStatus(org.jboss.netty.handler.codec.http.HttpResponseStatus) TimestampExtractExprMacro(org.apache.druid.query.expression.TimestampExtractExprMacro) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) RetryProxy(org.apache.hadoop.io.retry.RetryProxy) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) URL(java.net.URL) DruidConstants(org.apache.hadoop.hive.druid.conf.DruidConstants) AvroStreamInputRowParser(org.apache.hadoop.hive.druid.json.AvroStreamInputRowParser) LoggerFactory(org.slf4j.LoggerFactory) RetryPolicies(org.apache.hadoop.io.retry.RetryPolicies) TmpFileSegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory) ByteBuffer(java.nio.ByteBuffer) UDF(org.apache.hadoop.hive.ql.exec.UDF) Druids(org.apache.druid.query.Druids) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) TimestampFormatExprMacro(org.apache.druid.query.expression.TimestampFormatExprMacro) LikeExprMacro(org.apache.druid.query.expression.LikeExprMacro) Path(org.apache.hadoop.fs.Path) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ConciseBitmapSerdeFactory(org.apache.druid.segment.data.ConciseBitmapSerdeFactory) ByteArrayMapper(org.skife.jdbi.v2.util.ByteArrayMapper) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) ImmutableSet(com.google.common.collect.ImmutableSet) MetadataStorageTablesConfig(org.apache.druid.metadata.MetadataStorageTablesConfig) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TimestampCeilExprMacro(org.apache.druid.query.expression.TimestampCeilExprMacro) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) Interner(com.google.common.collect.Interner) BloomKFilterHolder(org.apache.druid.query.filter.BloomKFilterHolder) Collectors(java.util.stream.Collectors) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) Objects(java.util.Objects) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MapUtils(org.apache.druid.java.util.common.MapUtils) ExprNodeDynamicValueEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeDynamicValueEvaluator) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) TimestampShiftExprMacro(org.apache.druid.query.expression.TimestampShiftExprMacro) UDFToFloat(org.apache.hadoop.hive.ql.udf.UDFToFloat) AvroParseSpec(org.apache.hadoop.hive.druid.json.AvroParseSpec) ImmutableList(com.google.common.collect.ImmutableList) NoopEmitter(org.apache.druid.java.util.emitter.core.NoopEmitter) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Request(org.apache.druid.java.util.http.client.Request) PrimitiveObjectInspectorUtils(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) BytesWritable(org.apache.hadoop.io.BytesWritable) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) OutputStream(java.io.OutputStream) SQLMetadataConnector(org.apache.druid.metadata.SQLMetadataConnector) Period(org.joda.time.Period) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VirtualColumns(org.apache.druid.segment.VirtualColumns) Logger(org.slf4j.Logger) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) TimestampFloorExprMacro(org.apache.druid.query.expression.TimestampFloorExprMacro) TypeInfoFactory(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory) Iterator(java.util.Iterator) Folder3(org.skife.jdbi.v2.Folder3) MalformedURLException(java.net.MalformedURLException) MySQLConnector(org.apache.druid.metadata.storage.mysql.MySQLConnector) TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) HiveConf(org.apache.hadoop.hive.conf.HiveConf) DateTime(org.joda.time.DateTime) Interners(com.google.common.collect.Interners) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) Query(org.skife.jdbi.v2.Query) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) TimeUnit(java.util.concurrent.TimeUnit) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) BitmapSerdeFactory(org.apache.druid.segment.data.BitmapSerdeFactory) Handle(org.skife.jdbi.v2.Handle) Ordering(com.google.common.collect.Ordering) NullHandling(org.apache.druid.common.config.NullHandling) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) BloomDimFilter(org.apache.druid.query.filter.BloomDimFilter) TrimExprMacro(org.apache.druid.query.expression.TrimExprMacro) StringFullResponseHolder(org.apache.druid.java.util.http.client.response.StringFullResponseHolder) FloatSumAggregatorFactory(org.apache.druid.query.aggregation.FloatSumAggregatorFactory) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) InputStream(java.io.InputStream) ValueType(org.apache.druid.segment.column.ValueType) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) UDFToFloat(org.apache.hadoop.hive.ql.udf.UDFToFloat) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VirtualColumn(org.apache.druid.segment.VirtualColumn) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) Nullable(javax.annotation.Nullable)

Aggregations

VirtualColumns (org.apache.druid.segment.VirtualColumns)22 Test (org.junit.Test)19 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)14 Nullable (javax.annotation.Nullable)10 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)10 ImmutableList (com.google.common.collect.ImmutableList)9 List (java.util.List)9 BaseFloatColumnValueSelector (org.apache.druid.segment.BaseFloatColumnValueSelector)9 BaseLongColumnValueSelector (org.apache.druid.segment.BaseLongColumnValueSelector)9 BaseObjectColumnValueSelector (org.apache.druid.segment.BaseObjectColumnValueSelector)9 DimensionSelector (org.apache.druid.segment.DimensionSelector)9 VirtualColumn (org.apache.druid.segment.VirtualColumn)9 ColumnCapabilities (org.apache.druid.segment.column.ColumnCapabilities)9 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)8 Arrays (java.util.Arrays)8 TestExprMacroTable (org.apache.druid.query.expression.TestExprMacroTable)8 ColumnInspector (org.apache.druid.segment.ColumnInspector)8 ColumnValueSelector (org.apache.druid.segment.ColumnValueSelector)8 ExtractionDimensionSpec (org.apache.druid.query.dimension.ExtractionDimensionSpec)7 BucketExtractionFn (org.apache.druid.query.extraction.BucketExtractionFn)7