Examples with IndexPredicateAnalyzer - org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer

Example 1 with IndexPredicateAnalyzer

use of org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer in project hive by apache.

the class BitmapIndexHandler method getIndexPredicateAnalyzer.

/**
   * Instantiate a new predicate analyzer suitable for determining
   * whether we can use an index, based on rules for indexes in
   * WHERE clauses that we support
   *
   * @return preconfigured predicate analyzer for WHERE queries
   */
private IndexPredicateAnalyzer getIndexPredicateAnalyzer(List<Index> indexes, Set<Partition> queryPartitions) {
    IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
    analyzer.addComparisonOp(GenericUDFOPEqual.class.getName());
    analyzer.addComparisonOp(GenericUDFOPLessThan.class.getName());
    analyzer.addComparisonOp(GenericUDFOPEqualOrLessThan.class.getName());
    analyzer.addComparisonOp(GenericUDFOPGreaterThan.class.getName());
    analyzer.addComparisonOp(GenericUDFOPEqualOrGreaterThan.class.getName());
    // only return results for columns in the list of indexes
    for (Index index : indexes) {
        List<FieldSchema> columnSchemas = index.getSd().getCols();
        for (FieldSchema column : columnSchemas) {
            analyzer.allowColumnName(column.getName());
        }
    }
    // are used during the index query generation
    for (Partition part : queryPartitions) {
        if (part.getSpec().isEmpty()) {
            // empty partitions are from whole tables, so we don't want to add them in
            continue;
        }
        for (String column : part.getSpec().keySet()) {
            analyzer.allowColumnName(column);
        }
    }
    return analyzer;
}

Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) Partition(org.apache.hadoop.hive.ql.metadata.Partition) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) GenericUDFOPEqual(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual) Index(org.apache.hadoop.hive.metastore.api.Index) GenericUDFOPLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan) IndexPredicateAnalyzer(org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)

Example 2 with IndexPredicateAnalyzer

use of org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer in project cdap by caskdata.

the class HiveStreamInputFormat method setupBuilder.

/**
 * Setups the given {@link StreamInputSplitFinder.Builder} by analyzing the query.
 */
private StreamInputSplitFinder.Builder setupBuilder(Configuration conf, StreamConfig streamConfig, StreamInputSplitFinder.Builder builder) {
    // the conf contains a 'hive.io.filter.expr.serialized' key which contains the serialized form of ExprNodeDesc
    long startTime = Math.max(0L, System.currentTimeMillis() - streamConfig.getTTL());
    long endTime = System.currentTimeMillis();
    String serializedExpr = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
    if (serializedExpr == null) {
        return builder.setStartTime(startTime).setEndTime(endTime);
    }
    try {
        ExprNodeGenericFuncDesc expr = HiveUtilities.deserializeExpression(serializedExpr, conf);
        // Analyze the query to extract predicates that can be used for indexing (i.e. setting start/end time)
        IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
        for (CompareOp op : CompareOp.values()) {
            analyzer.addComparisonOp(op.getOpClassName());
        }
        // Stream can only be indexed by timestamp
        analyzer.clearAllowedColumnNames();
        analyzer.allowColumnName("ts");
        List<IndexSearchCondition> conditions = Lists.newArrayList();
        analyzer.analyzePredicate(expr, conditions);
        for (IndexSearchCondition condition : conditions) {
            CompareOp op = CompareOp.from(condition.getComparisonOp());
            if (op == null) {
                // Not a supported operation
                continue;
            }
            ExprNodeConstantDesc value = condition.getConstantDesc();
            if (value == null || !(value.getValue() instanceof Long)) {
                // Not a supported value
                continue;
            }
            long timestamp = (Long) value.getValue();
            // If there is a equal, set both start and endtime and no need to inspect further
            if (op == CompareOp.EQUAL) {
                startTime = timestamp;
                endTime = (timestamp < Long.MAX_VALUE) ? timestamp + 1L : timestamp;
                break;
            }
            if (op == CompareOp.GREATER || op == CompareOp.EQUAL_OR_GREATER) {
                // Plus 1 for the start time if it is greater since start time is inclusive in stream
                startTime = Math.max(startTime, timestamp + (timestamp < Long.MAX_VALUE && op == CompareOp.GREATER ? 1L : 0L));
            } else {
                // Plus 1 for end time if it is equal or less since end time is exclusive in stream
                endTime = Math.min(endTime, timestamp + (timestamp < Long.MAX_VALUE && op == CompareOp.EQUAL_OR_LESS ? 1L : 0L));
            }
        }
    } catch (Throwable t) {
        LOG.warn("Exception analyzing query predicate. A full table scan will be performed.", t);
    }
    return builder.setStartTime(startTime).setEndTime(endTime);
}

Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) IndexSearchCondition(org.apache.hadoop.hive.ql.index.IndexSearchCondition) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) IndexPredicateAnalyzer(org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer)

Example 3 with IndexPredicateAnalyzer

use of org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer in project mongo-hadoop by mongodb.

the class MongoStorageHandler method decomposePredicate.

@Override
public DecomposedPredicate decomposePredicate(final JobConf jobConf, final Deserializer deserializer, final ExprNodeDesc predicate) {
    BSONSerDe serde = (BSONSerDe) deserializer;
    // Create a new analyzer capable of handling equality and general
    // binary comparisons (false = "more than just equality").
    // TODO: The analyzer is only capable of handling binary comparison
    // expressions, but we could push down more than that in the future by
    // writing our own analyzer.
    IndexPredicateAnalyzer analyzer = IndexPredicateAnalyzer.createAnalyzer(false);
    // Predicate may contain any column.
    for (String colName : serde.columnNames) {
        analyzer.allowColumnName(colName);
    }
    List<IndexSearchCondition> searchConditions = new LinkedList<IndexSearchCondition>();
    ExprNodeDesc residual = analyzer.analyzePredicate(predicate, searchConditions);
    DecomposedPredicate decomposed = new DecomposedPredicate();
    decomposed.pushedPredicate = analyzer.translateSearchConditions(searchConditions);
    decomposed.residualPredicate = (ExprNodeGenericFuncDesc) residual;
    return decomposed;
}

Also used : IndexSearchCondition(org.apache.hadoop.hive.ql.index.IndexSearchCondition) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) IndexPredicateAnalyzer(org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer) LinkedList(java.util.LinkedList)

Example 4 with IndexPredicateAnalyzer

use of org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer in project hive by apache.

the class AccumuloPredicateHandler method getSearchConditions.

/**
 * @param conf
 *          Configuration
 * @return list of IndexSearchConditions from the filter expression.
 */
public List<IndexSearchCondition> getSearchConditions(Configuration conf) {
    final List<IndexSearchCondition> sConditions = Lists.newArrayList();
    ExprNodeDesc filterExpr = getExpression(conf);
    if (null == filterExpr) {
        return sConditions;
    }
    IndexPredicateAnalyzer analyzer = newAnalyzer(conf);
    ExprNodeDesc residual = analyzer.analyzePredicate(filterExpr, sConditions);
    if (residual != null) {
        throw new RuntimeException("Unexpected residual predicate: " + residual.getExprString());
    }
    return sConditions;
}

Example 5 with IndexPredicateAnalyzer

use of org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer in project hive by apache.

the class AccumuloPredicateHandler method newAnalyzer.

/**
 * Build an analyzer that allows comparison opts from compareOpts map, and all columns from table
 * definition.
 */
private IndexPredicateAnalyzer newAnalyzer(Configuration conf) {
    IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
    analyzer.clearAllowedColumnNames();
    for (String op : cOpKeyset()) {
        analyzer.addComparisonOp(op);
    }
    String[] hiveColumnNames = conf.getStrings(serdeConstants.LIST_COLUMNS);
    for (String col : hiveColumnNames) {
        analyzer.allowColumnName(col);
    }
    return analyzer;
}

Also used : IndexPredicateAnalyzer(org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer)

Aggregations

IndexPredicateAnalyzer (org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer)16 IndexSearchCondition (org.apache.hadoop.hive.ql.index.IndexSearchCondition)12 ArrayList (java.util.ArrayList)8 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)6 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)5 DecomposedPredicate (org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler.DecomposedPredicate)4 GenericUDFOPEqual (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual)3 GenericUDFOPEqualOrGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)3 GenericUDFOPEqualOrLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan)3 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)3 GenericUDFOPLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan)3 LinkedList (java.util.LinkedList)2 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)2 Index (org.apache.hadoop.hive.metastore.api.Index)2 Partition (org.apache.hadoop.hive.ql.metadata.Partition)2 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)2 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1