Search in sources :

Example 1 with GenericUDFToString

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString in project hive by apache.

the class TestAccumuloRangeGenerator method testCastExpression.

@Test
public void testCastExpression() throws Exception {
    // 40 and 50
    ExprNodeDesc fourty = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 40), fifty = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 50);
    // +
    GenericUDFOPPlus plus = new GenericUDFOPPlus();
    // 40 + 50
    ExprNodeGenericFuncDesc addition = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, plus, Arrays.asList(fourty, fifty));
    // cast(.... as string)
    GenericUDFToString stringCast = new GenericUDFToString();
    // cast (40 + 50 as string)
    ExprNodeGenericFuncDesc cast = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, stringCast, "cast", Collections.<ExprNodeDesc>singletonList(addition));
    ExprNodeDesc key = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "key", null, false);
    ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), Arrays.asList(key, cast));
    AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "key");
    SemanticDispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<SemanticRule, SemanticNodeProcessor>emptyMap(), null);
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(node);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    // Don't fail -- would be better to actually compute a range of [90,+inf)
    Object result = nodeOutput.get(node);
    Assert.assertNull(result);
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) HashMap(java.util.HashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) GenericUDFOPPlus(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) Test(org.junit.Test)

Example 2 with GenericUDFToString

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString in project hive by apache.

the class IndexPredicateAnalyzer method getColumnExpr.

// Check if ExprNodeColumnDesc is wrapped in expr.
// If so, peel off. Otherwise return itself.
private static ExprNodeDesc getColumnExpr(ExprNodeDesc expr) {
    if (expr instanceof ExprNodeColumnDesc) {
        return expr;
    }
    ExprNodeGenericFuncDesc funcDesc = null;
    if (expr instanceof ExprNodeGenericFuncDesc) {
        funcDesc = (ExprNodeGenericFuncDesc) expr;
    }
    if (null == funcDesc) {
        return expr;
    }
    GenericUDF udf = funcDesc.getGenericUDF();
    // check if its a simple cast expression.
    if ((udf instanceof GenericUDFBridge || udf instanceof GenericUDFToBinary || udf instanceof GenericUDFToString || udf instanceof GenericUDFToChar || udf instanceof GenericUDFToVarchar || udf instanceof GenericUDFToDecimal || udf instanceof GenericUDFToDate || udf instanceof GenericUDFToUnixTimeStamp || udf instanceof GenericUDFToUtcTimestamp) && funcDesc.getChildren().size() == 1 && funcDesc.getChildren().get(0) instanceof ExprNodeColumnDesc) {
        return expr.getChildren().get(0);
    }
    return expr;
}
Also used : GenericUDFToChar(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) GenericUDFToDecimal(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal) GenericUDFToUnixTimeStamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) GenericUDFToBinary(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary) GenericUDFToDate(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate) GenericUDFToUtcTimestamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUtcTimestamp) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) GenericUDFToVarchar(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToVarchar)

Example 3 with GenericUDFToString

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString in project hive by apache.

the class DruidStorageHandlerUtils method extractColName.

@Nullable
public static String extractColName(ExprNodeDesc expr, List<VirtualColumn> virtualColumns) {
    if (!druidSupportedTypeInfos.contains(expr.getTypeInfo())) {
        // We cannot pass the bloom filter to druid since bloom filter tests for exact object bytes.
        return null;
    }
    if (expr instanceof ExprNodeColumnDesc) {
        return ((ExprNodeColumnDesc) expr).getColumn();
    }
    ExprNodeGenericFuncDesc funcDesc = null;
    if (expr instanceof ExprNodeGenericFuncDesc) {
        funcDesc = (ExprNodeGenericFuncDesc) expr;
    }
    if (null == funcDesc) {
        return null;
    }
    GenericUDF udf = funcDesc.getGenericUDF();
    // bail out if its not a simple cast expression.
    if (funcDesc.getChildren().size() == 1 && funcDesc.getChildren().get(0) instanceof ExprNodeColumnDesc) {
        return null;
    }
    String columnName = ((ExprNodeColumnDesc) (funcDesc.getChildren().get(0))).getColumn();
    ValueType targetType = null;
    if (udf instanceof GenericUDFBridge) {
        Class<? extends UDF> udfClass = ((GenericUDFBridge) udf).getUdfClass();
        if (udfClass.equals(UDFToDouble.class)) {
            targetType = ValueType.DOUBLE;
        } else if (udfClass.equals(UDFToFloat.class)) {
            targetType = ValueType.FLOAT;
        } else if (udfClass.equals(UDFToLong.class)) {
            targetType = ValueType.LONG;
        }
    } else if (udf instanceof GenericUDFToString) {
        targetType = ValueType.STRING;
    }
    if (targetType == null) {
        return null;
    }
    String virtualColumnExpr = DruidQuery.format("CAST(%s, '%s')", columnName, targetType.toString());
    for (VirtualColumn column : virtualColumns) {
        if (column instanceof ExpressionVirtualColumn && ((ExpressionVirtualColumn) column).getExpression().equals(virtualColumnExpr)) {
            // Found an existing virtual column with same expression, no need to add another virtual column
            return column.getOutputName();
        }
    }
    Set<String> usedColumnNames = virtualColumns.stream().map(col -> col.getOutputName()).collect(Collectors.toSet());
    final String name = SqlValidatorUtil.uniquify("vc", usedColumnNames, SqlValidatorUtil.EXPR_SUGGESTER);
    ExpressionVirtualColumn expressionVirtualColumn = new ExpressionVirtualColumn(name, virtualColumnExpr, targetType, ExprMacroTable.nil());
    virtualColumns.add(expressionVirtualColumn);
    return name;
}
Also used : Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) HttpMethod(org.jboss.netty.handler.codec.http.HttpMethod) IndexSpec(org.apache.druid.segment.IndexSpec) FileStatus(org.apache.hadoop.fs.FileStatus) FunctionRegistry(org.apache.hadoop.hive.ql.exec.FunctionRegistry) StringFullResponseHandler(org.apache.druid.java.util.http.client.response.StringFullResponseHandler) HdfsDataSegmentPusherConfig(org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig) Pair(org.apache.druid.java.util.common.Pair) InetAddress(java.net.InetAddress) SqlValidatorUtil(org.apache.calcite.sql.validate.SqlValidatorUtil) JodaUtils(org.apache.druid.java.util.common.JodaUtils) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) ExprNodeEvaluatorFactory(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory) TimestampParseExprMacro(org.apache.druid.query.expression.TimestampParseExprMacro) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) InputStreamResponseHandler(org.apache.druid.java.util.http.client.response.InputStreamResponseHandler) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Set(java.util.Set) ValueType(org.apache.druid.segment.column.ValueType) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) DimFilter(org.apache.druid.query.filter.DimFilter) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) ISOChronology(org.joda.time.chrono.ISOChronology) BloomFilterSerializersModule(org.apache.druid.guice.BloomFilterSerializersModule) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) Iterables(com.google.common.collect.Iterables) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) Granularity(org.apache.druid.java.util.common.granularity.Granularity) HttpClient(org.apache.druid.java.util.http.client.HttpClient) UDFToLong(org.apache.hadoop.hive.ql.udf.UDFToLong) ScanQuery(org.apache.druid.query.scan.ScanQuery) TopNQuery(org.apache.druid.query.topn.TopNQuery) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) StringComparators(org.apache.druid.query.ordering.StringComparators) ResultIterator(org.skife.jdbi.v2.ResultIterator) DruidQuery(org.apache.calcite.adapter.druid.DruidQuery) Constants(org.apache.hadoop.hive.conf.Constants) Nullable(javax.annotation.Nullable) RegexpExtractExprMacro(org.apache.druid.query.expression.RegexpExtractExprMacro) HttpHeaders(org.jboss.netty.handler.codec.http.HttpHeaders) Properties(java.util.Properties) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) StringComparator(org.apache.druid.query.ordering.StringComparator) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) HandleCallback(org.skife.jdbi.v2.tweak.HandleCallback) VirtualColumn(org.apache.druid.segment.VirtualColumn) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) UDFToDouble(org.apache.hadoop.hive.ql.udf.UDFToDouble) Throwables(com.google.common.base.Throwables) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) GenericUDFInBloomFilter(org.apache.hadoop.hive.ql.udf.generic.GenericUDFInBloomFilter) IOException(java.io.IOException) NoneShardSpec(org.apache.druid.timeline.partition.NoneShardSpec) UnknownHostException(java.net.UnknownHostException) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) Table(org.apache.hadoop.hive.metastore.api.Table) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) GenericUDFBetween(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween) ExecutionException(java.util.concurrent.ExecutionException) HttpResponseStatus(org.jboss.netty.handler.codec.http.HttpResponseStatus) TimestampExtractExprMacro(org.apache.druid.query.expression.TimestampExtractExprMacro) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) RetryProxy(org.apache.hadoop.io.retry.RetryProxy) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) URL(java.net.URL) DruidConstants(org.apache.hadoop.hive.druid.conf.DruidConstants) AvroStreamInputRowParser(org.apache.hadoop.hive.druid.json.AvroStreamInputRowParser) LoggerFactory(org.slf4j.LoggerFactory) RetryPolicies(org.apache.hadoop.io.retry.RetryPolicies) TmpFileSegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory) ByteBuffer(java.nio.ByteBuffer) UDF(org.apache.hadoop.hive.ql.exec.UDF) Druids(org.apache.druid.query.Druids) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) TimestampFormatExprMacro(org.apache.druid.query.expression.TimestampFormatExprMacro) LikeExprMacro(org.apache.druid.query.expression.LikeExprMacro) Path(org.apache.hadoop.fs.Path) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ConciseBitmapSerdeFactory(org.apache.druid.segment.data.ConciseBitmapSerdeFactory) ByteArrayMapper(org.skife.jdbi.v2.util.ByteArrayMapper) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) ImmutableSet(com.google.common.collect.ImmutableSet) MetadataStorageTablesConfig(org.apache.druid.metadata.MetadataStorageTablesConfig) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TimestampCeilExprMacro(org.apache.druid.query.expression.TimestampCeilExprMacro) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) Interner(com.google.common.collect.Interner) BloomKFilterHolder(org.apache.druid.query.filter.BloomKFilterHolder) Collectors(java.util.stream.Collectors) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) Objects(java.util.Objects) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MapUtils(org.apache.druid.java.util.common.MapUtils) ExprNodeDynamicValueEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeDynamicValueEvaluator) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) TimestampShiftExprMacro(org.apache.druid.query.expression.TimestampShiftExprMacro) UDFToFloat(org.apache.hadoop.hive.ql.udf.UDFToFloat) AvroParseSpec(org.apache.hadoop.hive.druid.json.AvroParseSpec) ImmutableList(com.google.common.collect.ImmutableList) NoopEmitter(org.apache.druid.java.util.emitter.core.NoopEmitter) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Request(org.apache.druid.java.util.http.client.Request) PrimitiveObjectInspectorUtils(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) BytesWritable(org.apache.hadoop.io.BytesWritable) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) OutputStream(java.io.OutputStream) SQLMetadataConnector(org.apache.druid.metadata.SQLMetadataConnector) Period(org.joda.time.Period) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VirtualColumns(org.apache.druid.segment.VirtualColumns) Logger(org.slf4j.Logger) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) TimestampFloorExprMacro(org.apache.druid.query.expression.TimestampFloorExprMacro) TypeInfoFactory(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory) Iterator(java.util.Iterator) Folder3(org.skife.jdbi.v2.Folder3) MalformedURLException(java.net.MalformedURLException) MySQLConnector(org.apache.druid.metadata.storage.mysql.MySQLConnector) TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) HiveConf(org.apache.hadoop.hive.conf.HiveConf) DateTime(org.joda.time.DateTime) Interners(com.google.common.collect.Interners) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) Query(org.skife.jdbi.v2.Query) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) TimeUnit(java.util.concurrent.TimeUnit) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) BitmapSerdeFactory(org.apache.druid.segment.data.BitmapSerdeFactory) Handle(org.skife.jdbi.v2.Handle) Ordering(com.google.common.collect.Ordering) NullHandling(org.apache.druid.common.config.NullHandling) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) BloomDimFilter(org.apache.druid.query.filter.BloomDimFilter) TrimExprMacro(org.apache.druid.query.expression.TrimExprMacro) StringFullResponseHolder(org.apache.druid.java.util.http.client.response.StringFullResponseHolder) FloatSumAggregatorFactory(org.apache.druid.query.aggregation.FloatSumAggregatorFactory) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) InputStream(java.io.InputStream) ValueType(org.apache.druid.segment.column.ValueType) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) UDFToFloat(org.apache.hadoop.hive.ql.udf.UDFToFloat) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VirtualColumn(org.apache.druid.segment.VirtualColumn) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) Nullable(javax.annotation.Nullable)

Aggregations

ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)3 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)3 GenericUDFToString (org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString)3 ArrayList (java.util.ArrayList)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)2 GenericUDFBridge (org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge)2 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)1 InjectableValues (com.fasterxml.jackson.databind.InjectableValues)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 NamedType (com.fasterxml.jackson.databind.jsontype.NamedType)1 SmileFactory (com.fasterxml.jackson.dataformat.smile.SmileFactory)1 Throwables (com.google.common.base.Throwables)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Interner (com.google.common.collect.Interner)1 Interners (com.google.common.collect.Interners)1 Iterables (com.google.common.collect.Iterables)1 Lists (com.google.common.collect.Lists)1