Search in sources :

Example 6 with UDF

use of org.apache.hadoop.hive.ql.exec.UDF in project hive by apache.

the class DruidStorageHandlerUtils method extractColName.

@Nullable
public static String extractColName(ExprNodeDesc expr, List<VirtualColumn> virtualColumns) {
    if (!druidSupportedTypeInfos.contains(expr.getTypeInfo())) {
        // We cannot pass the bloom filter to druid since bloom filter tests for exact object bytes.
        return null;
    }
    if (expr instanceof ExprNodeColumnDesc) {
        return ((ExprNodeColumnDesc) expr).getColumn();
    }
    ExprNodeGenericFuncDesc funcDesc = null;
    if (expr instanceof ExprNodeGenericFuncDesc) {
        funcDesc = (ExprNodeGenericFuncDesc) expr;
    }
    if (null == funcDesc) {
        return null;
    }
    GenericUDF udf = funcDesc.getGenericUDF();
    // bail out if its not a simple cast expression.
    if (funcDesc.getChildren().size() == 1 && funcDesc.getChildren().get(0) instanceof ExprNodeColumnDesc) {
        return null;
    }
    String columnName = ((ExprNodeColumnDesc) (funcDesc.getChildren().get(0))).getColumn();
    ValueType targetType = null;
    if (udf instanceof GenericUDFBridge) {
        Class<? extends UDF> udfClass = ((GenericUDFBridge) udf).getUdfClass();
        if (udfClass.equals(UDFToDouble.class)) {
            targetType = ValueType.DOUBLE;
        } else if (udfClass.equals(UDFToFloat.class)) {
            targetType = ValueType.FLOAT;
        } else if (udfClass.equals(UDFToLong.class)) {
            targetType = ValueType.LONG;
        }
    } else if (udf instanceof GenericUDFToString) {
        targetType = ValueType.STRING;
    }
    if (targetType == null) {
        return null;
    }
    String virtualColumnExpr = DruidQuery.format("CAST(%s, '%s')", columnName, targetType.toString());
    for (VirtualColumn column : virtualColumns) {
        if (column instanceof ExpressionVirtualColumn && ((ExpressionVirtualColumn) column).getExpression().equals(virtualColumnExpr)) {
            // Found an existing virtual column with same expression, no need to add another virtual column
            return column.getOutputName();
        }
    }
    Set<String> usedColumnNames = virtualColumns.stream().map(col -> col.getOutputName()).collect(Collectors.toSet());
    final String name = SqlValidatorUtil.uniquify("vc", usedColumnNames, SqlValidatorUtil.EXPR_SUGGESTER);
    ExpressionVirtualColumn expressionVirtualColumn = new ExpressionVirtualColumn(name, virtualColumnExpr, targetType, ExprMacroTable.nil());
    virtualColumns.add(expressionVirtualColumn);
    return name;
}
Also used : Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) HttpMethod(org.jboss.netty.handler.codec.http.HttpMethod) IndexSpec(org.apache.druid.segment.IndexSpec) FileStatus(org.apache.hadoop.fs.FileStatus) FunctionRegistry(org.apache.hadoop.hive.ql.exec.FunctionRegistry) StringFullResponseHandler(org.apache.druid.java.util.http.client.response.StringFullResponseHandler) HdfsDataSegmentPusherConfig(org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig) Pair(org.apache.druid.java.util.common.Pair) InetAddress(java.net.InetAddress) SqlValidatorUtil(org.apache.calcite.sql.validate.SqlValidatorUtil) JodaUtils(org.apache.druid.java.util.common.JodaUtils) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) ExprNodeEvaluatorFactory(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory) TimestampParseExprMacro(org.apache.druid.query.expression.TimestampParseExprMacro) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) InputStreamResponseHandler(org.apache.druid.java.util.http.client.response.InputStreamResponseHandler) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Set(java.util.Set) ValueType(org.apache.druid.segment.column.ValueType) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) DimFilter(org.apache.druid.query.filter.DimFilter) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) ISOChronology(org.joda.time.chrono.ISOChronology) BloomFilterSerializersModule(org.apache.druid.guice.BloomFilterSerializersModule) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) Iterables(com.google.common.collect.Iterables) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) Granularity(org.apache.druid.java.util.common.granularity.Granularity) HttpClient(org.apache.druid.java.util.http.client.HttpClient) UDFToLong(org.apache.hadoop.hive.ql.udf.UDFToLong) ScanQuery(org.apache.druid.query.scan.ScanQuery) TopNQuery(org.apache.druid.query.topn.TopNQuery) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) StringComparators(org.apache.druid.query.ordering.StringComparators) ResultIterator(org.skife.jdbi.v2.ResultIterator) DruidQuery(org.apache.calcite.adapter.druid.DruidQuery) Constants(org.apache.hadoop.hive.conf.Constants) Nullable(javax.annotation.Nullable) RegexpExtractExprMacro(org.apache.druid.query.expression.RegexpExtractExprMacro) HttpHeaders(org.jboss.netty.handler.codec.http.HttpHeaders) Properties(java.util.Properties) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) StringComparator(org.apache.druid.query.ordering.StringComparator) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) HandleCallback(org.skife.jdbi.v2.tweak.HandleCallback) VirtualColumn(org.apache.druid.segment.VirtualColumn) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) UDFToDouble(org.apache.hadoop.hive.ql.udf.UDFToDouble) Throwables(com.google.common.base.Throwables) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) GenericUDFInBloomFilter(org.apache.hadoop.hive.ql.udf.generic.GenericUDFInBloomFilter) IOException(java.io.IOException) NoneShardSpec(org.apache.druid.timeline.partition.NoneShardSpec) UnknownHostException(java.net.UnknownHostException) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) Table(org.apache.hadoop.hive.metastore.api.Table) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) GenericUDFBetween(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween) ExecutionException(java.util.concurrent.ExecutionException) HttpResponseStatus(org.jboss.netty.handler.codec.http.HttpResponseStatus) TimestampExtractExprMacro(org.apache.druid.query.expression.TimestampExtractExprMacro) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) RetryProxy(org.apache.hadoop.io.retry.RetryProxy) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) URL(java.net.URL) DruidConstants(org.apache.hadoop.hive.druid.conf.DruidConstants) AvroStreamInputRowParser(org.apache.hadoop.hive.druid.json.AvroStreamInputRowParser) LoggerFactory(org.slf4j.LoggerFactory) RetryPolicies(org.apache.hadoop.io.retry.RetryPolicies) TmpFileSegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory) ByteBuffer(java.nio.ByteBuffer) UDF(org.apache.hadoop.hive.ql.exec.UDF) Druids(org.apache.druid.query.Druids) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) TimestampFormatExprMacro(org.apache.druid.query.expression.TimestampFormatExprMacro) LikeExprMacro(org.apache.druid.query.expression.LikeExprMacro) Path(org.apache.hadoop.fs.Path) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ConciseBitmapSerdeFactory(org.apache.druid.segment.data.ConciseBitmapSerdeFactory) ByteArrayMapper(org.skife.jdbi.v2.util.ByteArrayMapper) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) ImmutableSet(com.google.common.collect.ImmutableSet) MetadataStorageTablesConfig(org.apache.druid.metadata.MetadataStorageTablesConfig) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TimestampCeilExprMacro(org.apache.druid.query.expression.TimestampCeilExprMacro) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) Interner(com.google.common.collect.Interner) BloomKFilterHolder(org.apache.druid.query.filter.BloomKFilterHolder) Collectors(java.util.stream.Collectors) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) Objects(java.util.Objects) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MapUtils(org.apache.druid.java.util.common.MapUtils) ExprNodeDynamicValueEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeDynamicValueEvaluator) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) TimestampShiftExprMacro(org.apache.druid.query.expression.TimestampShiftExprMacro) UDFToFloat(org.apache.hadoop.hive.ql.udf.UDFToFloat) AvroParseSpec(org.apache.hadoop.hive.druid.json.AvroParseSpec) ImmutableList(com.google.common.collect.ImmutableList) NoopEmitter(org.apache.druid.java.util.emitter.core.NoopEmitter) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Request(org.apache.druid.java.util.http.client.Request) PrimitiveObjectInspectorUtils(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) BytesWritable(org.apache.hadoop.io.BytesWritable) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) OutputStream(java.io.OutputStream) SQLMetadataConnector(org.apache.druid.metadata.SQLMetadataConnector) Period(org.joda.time.Period) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VirtualColumns(org.apache.druid.segment.VirtualColumns) Logger(org.slf4j.Logger) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) TimestampFloorExprMacro(org.apache.druid.query.expression.TimestampFloorExprMacro) TypeInfoFactory(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory) Iterator(java.util.Iterator) Folder3(org.skife.jdbi.v2.Folder3) MalformedURLException(java.net.MalformedURLException) MySQLConnector(org.apache.druid.metadata.storage.mysql.MySQLConnector) TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) HiveConf(org.apache.hadoop.hive.conf.HiveConf) DateTime(org.joda.time.DateTime) Interners(com.google.common.collect.Interners) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) Query(org.skife.jdbi.v2.Query) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) TimeUnit(java.util.concurrent.TimeUnit) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) BitmapSerdeFactory(org.apache.druid.segment.data.BitmapSerdeFactory) Handle(org.skife.jdbi.v2.Handle) Ordering(com.google.common.collect.Ordering) NullHandling(org.apache.druid.common.config.NullHandling) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) BloomDimFilter(org.apache.druid.query.filter.BloomDimFilter) TrimExprMacro(org.apache.druid.query.expression.TrimExprMacro) StringFullResponseHolder(org.apache.druid.java.util.http.client.response.StringFullResponseHolder) FloatSumAggregatorFactory(org.apache.druid.query.aggregation.FloatSumAggregatorFactory) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) InputStream(java.io.InputStream) ValueType(org.apache.druid.segment.column.ValueType) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) UDFToFloat(org.apache.hadoop.hive.ql.udf.UDFToFloat) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VirtualColumn(org.apache.druid.segment.VirtualColumn) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) Nullable(javax.annotation.Nullable)

Aggregations

UDF (org.apache.hadoop.hive.ql.exec.UDF)6 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)5 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)5 GenericUDFBridge (org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge)5 UDFArgumentException (org.apache.hadoop.hive.ql.exec.UDFArgumentException)3 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)3 List (java.util.List)2 ExprNodeEvaluator (org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator)2 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)2 QualifiedObjectName (com.facebook.presto.common.QualifiedObjectName)1 Type (com.facebook.presto.common.type.Type)1 TypeManager (com.facebook.presto.common.type.TypeManager)1 TypeSignature (com.facebook.presto.common.type.TypeSignature)1 HiveFunctionErrorCode.executionError (com.facebook.presto.hive.functions.HiveFunctionErrorCode.executionError)1 HiveFunctionErrorCode.initializationError (com.facebook.presto.hive.functions.HiveFunctionErrorCode.initializationError)1 HiveFunctionErrorCode.unsupportedFunctionType (com.facebook.presto.hive.functions.HiveFunctionErrorCode.unsupportedFunctionType)1 ObjectEncoder (com.facebook.presto.hive.functions.type.ObjectEncoder)1 ObjectEncoders.createEncoder (com.facebook.presto.hive.functions.type.ObjectEncoders.createEncoder)1 ObjectInputDecoder (com.facebook.presto.hive.functions.type.ObjectInputDecoder)1 ObjectInputDecoders.createDecoder (com.facebook.presto.hive.functions.type.ObjectInputDecoders.createDecoder)1