Search in sources :

Example 21 with GenericUDFBridge

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge in project hive by apache.

the class TestUDFUUID method testUUID.

@Test
public void testUUID() throws Exception {
    UDFUUID udf = new UDFUUID();
    String id1 = udf.evaluate().toString();
    String id2 = udf.evaluate().toString();
    assertFalse(id1.equals(id2));
    assertEquals(id1.length(), 36);
    assertEquals(id2.length(), 36);
    GenericUDFBridge bridge = new GenericUDFBridge("uuid", false, UDFUUID.class.getName());
    assertFalse(FunctionRegistry.isDeterministic(bridge));
}
Also used : GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) Test(org.junit.Test)

Example 22 with GenericUDFBridge

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge in project hive by apache.

the class DruidStorageHandlerUtils method extractColName.

@Nullable
public static String extractColName(ExprNodeDesc expr, List<VirtualColumn> virtualColumns) {
    if (!druidSupportedTypeInfos.contains(expr.getTypeInfo())) {
        // We cannot pass the bloom filter to druid since bloom filter tests for exact object bytes.
        return null;
    }
    if (expr instanceof ExprNodeColumnDesc) {
        return ((ExprNodeColumnDesc) expr).getColumn();
    }
    ExprNodeGenericFuncDesc funcDesc = null;
    if (expr instanceof ExprNodeGenericFuncDesc) {
        funcDesc = (ExprNodeGenericFuncDesc) expr;
    }
    if (null == funcDesc) {
        return null;
    }
    GenericUDF udf = funcDesc.getGenericUDF();
    // bail out if its not a simple cast expression.
    if (funcDesc.getChildren().size() == 1 && funcDesc.getChildren().get(0) instanceof ExprNodeColumnDesc) {
        return null;
    }
    String columnName = ((ExprNodeColumnDesc) (funcDesc.getChildren().get(0))).getColumn();
    ValueType targetType = null;
    if (udf instanceof GenericUDFBridge) {
        Class<? extends UDF> udfClass = ((GenericUDFBridge) udf).getUdfClass();
        if (udfClass.equals(UDFToDouble.class)) {
            targetType = ValueType.DOUBLE;
        } else if (udfClass.equals(UDFToFloat.class)) {
            targetType = ValueType.FLOAT;
        } else if (udfClass.equals(UDFToLong.class)) {
            targetType = ValueType.LONG;
        }
    } else if (udf instanceof GenericUDFToString) {
        targetType = ValueType.STRING;
    }
    if (targetType == null) {
        return null;
    }
    String virtualColumnExpr = DruidQuery.format("CAST(%s, '%s')", columnName, targetType.toString());
    for (VirtualColumn column : virtualColumns) {
        if (column instanceof ExpressionVirtualColumn && ((ExpressionVirtualColumn) column).getExpression().equals(virtualColumnExpr)) {
            // Found an existing virtual column with same expression, no need to add another virtual column
            return column.getOutputName();
        }
    }
    Set<String> usedColumnNames = virtualColumns.stream().map(col -> col.getOutputName()).collect(Collectors.toSet());
    final String name = SqlValidatorUtil.uniquify("vc", usedColumnNames, SqlValidatorUtil.EXPR_SUGGESTER);
    ExpressionVirtualColumn expressionVirtualColumn = new ExpressionVirtualColumn(name, virtualColumnExpr, targetType, ExprMacroTable.nil());
    virtualColumns.add(expressionVirtualColumn);
    return name;
}
Also used : Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) HttpMethod(org.jboss.netty.handler.codec.http.HttpMethod) IndexSpec(org.apache.druid.segment.IndexSpec) FileStatus(org.apache.hadoop.fs.FileStatus) FunctionRegistry(org.apache.hadoop.hive.ql.exec.FunctionRegistry) StringFullResponseHandler(org.apache.druid.java.util.http.client.response.StringFullResponseHandler) HdfsDataSegmentPusherConfig(org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig) Pair(org.apache.druid.java.util.common.Pair) InetAddress(java.net.InetAddress) SqlValidatorUtil(org.apache.calcite.sql.validate.SqlValidatorUtil) JodaUtils(org.apache.druid.java.util.common.JodaUtils) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) ExprNodeEvaluatorFactory(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory) TimestampParseExprMacro(org.apache.druid.query.expression.TimestampParseExprMacro) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) InputStreamResponseHandler(org.apache.druid.java.util.http.client.response.InputStreamResponseHandler) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Set(java.util.Set) ValueType(org.apache.druid.segment.column.ValueType) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) DimFilter(org.apache.druid.query.filter.DimFilter) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) ISOChronology(org.joda.time.chrono.ISOChronology) BloomFilterSerializersModule(org.apache.druid.guice.BloomFilterSerializersModule) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) Iterables(com.google.common.collect.Iterables) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) Granularity(org.apache.druid.java.util.common.granularity.Granularity) HttpClient(org.apache.druid.java.util.http.client.HttpClient) UDFToLong(org.apache.hadoop.hive.ql.udf.UDFToLong) ScanQuery(org.apache.druid.query.scan.ScanQuery) TopNQuery(org.apache.druid.query.topn.TopNQuery) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) StringComparators(org.apache.druid.query.ordering.StringComparators) ResultIterator(org.skife.jdbi.v2.ResultIterator) DruidQuery(org.apache.calcite.adapter.druid.DruidQuery) Constants(org.apache.hadoop.hive.conf.Constants) Nullable(javax.annotation.Nullable) RegexpExtractExprMacro(org.apache.druid.query.expression.RegexpExtractExprMacro) HttpHeaders(org.jboss.netty.handler.codec.http.HttpHeaders) Properties(java.util.Properties) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) StringComparator(org.apache.druid.query.ordering.StringComparator) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) HandleCallback(org.skife.jdbi.v2.tweak.HandleCallback) VirtualColumn(org.apache.druid.segment.VirtualColumn) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) UDFToDouble(org.apache.hadoop.hive.ql.udf.UDFToDouble) Throwables(com.google.common.base.Throwables) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) GenericUDFInBloomFilter(org.apache.hadoop.hive.ql.udf.generic.GenericUDFInBloomFilter) IOException(java.io.IOException) NoneShardSpec(org.apache.druid.timeline.partition.NoneShardSpec) UnknownHostException(java.net.UnknownHostException) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) Table(org.apache.hadoop.hive.metastore.api.Table) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) GenericUDFBetween(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween) ExecutionException(java.util.concurrent.ExecutionException) HttpResponseStatus(org.jboss.netty.handler.codec.http.HttpResponseStatus) TimestampExtractExprMacro(org.apache.druid.query.expression.TimestampExtractExprMacro) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) RetryProxy(org.apache.hadoop.io.retry.RetryProxy) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) URL(java.net.URL) DruidConstants(org.apache.hadoop.hive.druid.conf.DruidConstants) AvroStreamInputRowParser(org.apache.hadoop.hive.druid.json.AvroStreamInputRowParser) LoggerFactory(org.slf4j.LoggerFactory) RetryPolicies(org.apache.hadoop.io.retry.RetryPolicies) TmpFileSegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory) ByteBuffer(java.nio.ByteBuffer) UDF(org.apache.hadoop.hive.ql.exec.UDF) Druids(org.apache.druid.query.Druids) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) TimestampFormatExprMacro(org.apache.druid.query.expression.TimestampFormatExprMacro) LikeExprMacro(org.apache.druid.query.expression.LikeExprMacro) Path(org.apache.hadoop.fs.Path) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ConciseBitmapSerdeFactory(org.apache.druid.segment.data.ConciseBitmapSerdeFactory) ByteArrayMapper(org.skife.jdbi.v2.util.ByteArrayMapper) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) ImmutableSet(com.google.common.collect.ImmutableSet) MetadataStorageTablesConfig(org.apache.druid.metadata.MetadataStorageTablesConfig) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TimestampCeilExprMacro(org.apache.druid.query.expression.TimestampCeilExprMacro) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) Interner(com.google.common.collect.Interner) BloomKFilterHolder(org.apache.druid.query.filter.BloomKFilterHolder) Collectors(java.util.stream.Collectors) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) Objects(java.util.Objects) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MapUtils(org.apache.druid.java.util.common.MapUtils) ExprNodeDynamicValueEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeDynamicValueEvaluator) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) TimestampShiftExprMacro(org.apache.druid.query.expression.TimestampShiftExprMacro) UDFToFloat(org.apache.hadoop.hive.ql.udf.UDFToFloat) AvroParseSpec(org.apache.hadoop.hive.druid.json.AvroParseSpec) ImmutableList(com.google.common.collect.ImmutableList) NoopEmitter(org.apache.druid.java.util.emitter.core.NoopEmitter) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Request(org.apache.druid.java.util.http.client.Request) PrimitiveObjectInspectorUtils(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) BytesWritable(org.apache.hadoop.io.BytesWritable) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) OutputStream(java.io.OutputStream) SQLMetadataConnector(org.apache.druid.metadata.SQLMetadataConnector) Period(org.joda.time.Period) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VirtualColumns(org.apache.druid.segment.VirtualColumns) Logger(org.slf4j.Logger) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) TimestampFloorExprMacro(org.apache.druid.query.expression.TimestampFloorExprMacro) TypeInfoFactory(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory) Iterator(java.util.Iterator) Folder3(org.skife.jdbi.v2.Folder3) MalformedURLException(java.net.MalformedURLException) MySQLConnector(org.apache.druid.metadata.storage.mysql.MySQLConnector) TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) HiveConf(org.apache.hadoop.hive.conf.HiveConf) DateTime(org.joda.time.DateTime) Interners(com.google.common.collect.Interners) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) Query(org.skife.jdbi.v2.Query) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) TimeUnit(java.util.concurrent.TimeUnit) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) BitmapSerdeFactory(org.apache.druid.segment.data.BitmapSerdeFactory) Handle(org.skife.jdbi.v2.Handle) Ordering(com.google.common.collect.Ordering) NullHandling(org.apache.druid.common.config.NullHandling) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) BloomDimFilter(org.apache.druid.query.filter.BloomDimFilter) TrimExprMacro(org.apache.druid.query.expression.TrimExprMacro) StringFullResponseHolder(org.apache.druid.java.util.http.client.response.StringFullResponseHolder) FloatSumAggregatorFactory(org.apache.druid.query.aggregation.FloatSumAggregatorFactory) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) InputStream(java.io.InputStream) ValueType(org.apache.druid.segment.column.ValueType) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) UDFToFloat(org.apache.hadoop.hive.ql.udf.UDFToFloat) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VirtualColumn(org.apache.druid.segment.VirtualColumn) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) Nullable(javax.annotation.Nullable)

Example 23 with GenericUDFBridge

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge in project hive by apache.

the class ConstantPropagateProcFactory method isDeterministicUdf.

private static boolean isDeterministicUdf(GenericUDF udf, List<ExprNodeDesc> children) {
    UDFType udfType = udf.getClass().getAnnotation(UDFType.class);
    if (udf instanceof GenericUDFBridge) {
        udfType = ((GenericUDFBridge) udf).getUdfClass().getAnnotation(UDFType.class);
    }
    if (udfType.deterministic() == false) {
        if (udf.getClass().equals(GenericUDFUnixTimeStamp.class) && children != null && children.size() > 0) {
            // unix_timestamp is polymorphic (ignore class annotations)
            return true;
        }
        return false;
    }
    // If udf is requiring additional jars, we can't determine the result in
    // compile time.
    String[] files;
    String[] jars;
    if (udf instanceof GenericUDFBridge) {
        GenericUDFBridge bridge = (GenericUDFBridge) udf;
        String udfClassName = bridge.getUdfClassName();
        try {
            UDF udfInternal = (UDF) Class.forName(bridge.getUdfClassName(), true, Utilities.getSessionSpecifiedClassLoader()).newInstance();
            files = udfInternal.getRequiredFiles();
            jars = udfInternal.getRequiredJars();
        } catch (Exception e) {
            LOG.error("The UDF implementation class '" + udfClassName + "' is not present in the class path");
            return false;
        }
    } else {
        files = udf.getRequiredFiles();
        jars = udf.getRequiredJars();
    }
    if (files != null || jars != null) {
        return false;
    }
    return true;
}
Also used : UDF(org.apache.hadoop.hive.ql.exec.UDF) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) UDFType(org.apache.hadoop.hive.ql.udf.UDFType) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException)

Example 24 with GenericUDFBridge

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge in project drill by apache.

the class HiveFunctionRegistry method matchAndCreateUDFHolder.

private HiveFuncHolder matchAndCreateUDFHolder(String udfName, Class<? extends UDF> udfClazz, MajorType[] argTypes, ObjectInspector[] argOIs) {
    try {
        GenericUDF udfInstance = new GenericUDFBridge(udfName, false, /* is operator */
        udfClazz.getName());
        ObjectInspector returnOI = udfInstance.initialize(argOIs);
        return new HiveFuncHolder(udfName, udfClazz, argTypes, returnOI, Types.optional(ObjectInspectorHelper.getDrillType(returnOI)), nonDeterministicUDFs.contains(udfClazz));
    } catch (Exception e) {
    /*ignore this*/
    }
    return null;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) UserException(org.apache.drill.common.exceptions.UserException)

Aggregations

GenericUDFBridge (org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge)24 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)12 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)10 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)10 ArrayList (java.util.ArrayList)7 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)7 Test (org.junit.Test)6 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)5 Description (org.apache.hadoop.hive.ql.exec.Description)4 UDF (org.apache.hadoop.hive.ql.exec.UDF)4 GenericUDFToUnixTimeStamp (org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp)4 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)4 UDFArgumentException (org.apache.hadoop.hive.ql.exec.UDFArgumentException)3 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)3 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)3 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3 ExprNodeEvaluator (org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator)2 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)2 UDFYear (org.apache.hadoop.hive.ql.udf.UDFYear)2 GenericUDFToBinary (org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary)2