Search in sources :

Example 1 with IndexSearchCondition

use of org.apache.hadoop.hive.ql.index.IndexSearchCondition in project hive by apache.

the class HiveHBaseTableInputFormat method setupTimeRange.

private void setupTimeRange(Scan scan, List<IndexSearchCondition> conditions) throws IOException {
    long start = 0;
    long end = Long.MAX_VALUE;
    for (IndexSearchCondition sc : conditions) {
        long timestamp = getTimestampVal(sc);
        String comparisonOp = sc.getComparisonOp();
        if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual".equals(comparisonOp)) {
            start = timestamp;
            end = timestamp + 1;
        } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan".equals(comparisonOp)) {
            end = timestamp;
        } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan".equals(comparisonOp)) {
            start = timestamp;
        } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan".equals(comparisonOp)) {
            start = timestamp + 1;
        } else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan".equals(comparisonOp)) {
            end = timestamp + 1;
        } else {
            throw new IOException(comparisonOp + " is not a supported comparison operator");
        }
    }
    scan.setTimeRange(start, end);
}
Also used : IndexSearchCondition(org.apache.hadoop.hive.ql.index.IndexSearchCondition) IOException(java.io.IOException)

Example 2 with IndexSearchCondition

use of org.apache.hadoop.hive.ql.index.IndexSearchCondition in project cdap by caskdata.

the class HiveStreamInputFormat method setupBuilder.

/**
 * Setups the given {@link StreamInputSplitFinder.Builder} by analyzing the query.
 */
private StreamInputSplitFinder.Builder setupBuilder(Configuration conf, StreamConfig streamConfig, StreamInputSplitFinder.Builder builder) {
    // the conf contains a 'hive.io.filter.expr.serialized' key which contains the serialized form of ExprNodeDesc
    long startTime = Math.max(0L, System.currentTimeMillis() - streamConfig.getTTL());
    long endTime = System.currentTimeMillis();
    String serializedExpr = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
    if (serializedExpr == null) {
        return builder.setStartTime(startTime).setEndTime(endTime);
    }
    try {
        ExprNodeGenericFuncDesc expr = HiveUtilities.deserializeExpression(serializedExpr, conf);
        // Analyze the query to extract predicates that can be used for indexing (i.e. setting start/end time)
        IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
        for (CompareOp op : CompareOp.values()) {
            analyzer.addComparisonOp(op.getOpClassName());
        }
        // Stream can only be indexed by timestamp
        analyzer.clearAllowedColumnNames();
        analyzer.allowColumnName("ts");
        List<IndexSearchCondition> conditions = Lists.newArrayList();
        analyzer.analyzePredicate(expr, conditions);
        for (IndexSearchCondition condition : conditions) {
            CompareOp op = CompareOp.from(condition.getComparisonOp());
            if (op == null) {
                // Not a supported operation
                continue;
            }
            ExprNodeConstantDesc value = condition.getConstantDesc();
            if (value == null || !(value.getValue() instanceof Long)) {
                // Not a supported value
                continue;
            }
            long timestamp = (Long) value.getValue();
            // If there is a equal, set both start and endtime and no need to inspect further
            if (op == CompareOp.EQUAL) {
                startTime = timestamp;
                endTime = (timestamp < Long.MAX_VALUE) ? timestamp + 1L : timestamp;
                break;
            }
            if (op == CompareOp.GREATER || op == CompareOp.EQUAL_OR_GREATER) {
                // Plus 1 for the start time if it is greater since start time is inclusive in stream
                startTime = Math.max(startTime, timestamp + (timestamp < Long.MAX_VALUE && op == CompareOp.GREATER ? 1L : 0L));
            } else {
                // Plus 1 for end time if it is equal or less since end time is exclusive in stream
                endTime = Math.min(endTime, timestamp + (timestamp < Long.MAX_VALUE && op == CompareOp.EQUAL_OR_LESS ? 1L : 0L));
            }
        }
    } catch (Throwable t) {
        LOG.warn("Exception analyzing query predicate. A full table scan will be performed.", t);
    }
    return builder.setStartTime(startTime).setEndTime(endTime);
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) IndexSearchCondition(org.apache.hadoop.hive.ql.index.IndexSearchCondition) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) IndexPredicateAnalyzer(org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer)

Example 3 with IndexSearchCondition

use of org.apache.hadoop.hive.ql.index.IndexSearchCondition in project mongo-hadoop by mongodb.

the class MongoStorageHandler method decomposePredicate.

@Override
public DecomposedPredicate decomposePredicate(final JobConf jobConf, final Deserializer deserializer, final ExprNodeDesc predicate) {
    BSONSerDe serde = (BSONSerDe) deserializer;
    // Create a new analyzer capable of handling equality and general
    // binary comparisons (false = "more than just equality").
    // TODO: The analyzer is only capable of handling binary comparison
    // expressions, but we could push down more than that in the future by
    // writing our own analyzer.
    IndexPredicateAnalyzer analyzer = IndexPredicateAnalyzer.createAnalyzer(false);
    // Predicate may contain any column.
    for (String colName : serde.columnNames) {
        analyzer.allowColumnName(colName);
    }
    List<IndexSearchCondition> searchConditions = new LinkedList<IndexSearchCondition>();
    ExprNodeDesc residual = analyzer.analyzePredicate(predicate, searchConditions);
    DecomposedPredicate decomposed = new DecomposedPredicate();
    decomposed.pushedPredicate = analyzer.translateSearchConditions(searchConditions);
    decomposed.residualPredicate = (ExprNodeGenericFuncDesc) residual;
    return decomposed;
}
Also used : IndexSearchCondition(org.apache.hadoop.hive.ql.index.IndexSearchCondition) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) IndexPredicateAnalyzer(org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer) LinkedList(java.util.LinkedList)

Example 4 with IndexSearchCondition

use of org.apache.hadoop.hive.ql.index.IndexSearchCondition in project hive by apache.

the class AccumuloPredicateHandler method getSearchConditions.

/**
 * @param conf
 *          Configuration
 * @return list of IndexSearchConditions from the filter expression.
 */
public List<IndexSearchCondition> getSearchConditions(Configuration conf) {
    final List<IndexSearchCondition> sConditions = Lists.newArrayList();
    ExprNodeDesc filterExpr = getExpression(conf);
    if (null == filterExpr) {
        return sConditions;
    }
    IndexPredicateAnalyzer analyzer = newAnalyzer(conf);
    ExprNodeDesc residual = analyzer.analyzePredicate(filterExpr, sConditions);
    if (residual != null) {
        throw new RuntimeException("Unexpected residual predicate: " + residual.getExprString());
    }
    return sConditions;
}
Also used : IndexSearchCondition(org.apache.hadoop.hive.ql.index.IndexSearchCondition) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) IndexPredicateAnalyzer(org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer)

Example 5 with IndexSearchCondition

use of org.apache.hadoop.hive.ql.index.IndexSearchCondition in project hive by apache.

the class AccumuloPredicateHandler method getIterators.

/**
 * Loop through search conditions and build iterator settings for predicates involving columns
 * other than rowID, if any.
 *
 * @param conf
 *          Configuration
 * @throws SerDeException
 */
public List<IteratorSetting> getIterators(Configuration conf, ColumnMapper columnMapper) throws SerDeException {
    List<IteratorSetting> itrs = Lists.newArrayList();
    boolean shouldPushdown = conf.getBoolean(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, AccumuloSerDeParameters.ITERATOR_PUSHDOWN_DEFAULT);
    if (!shouldPushdown) {
        LOG.info("Iterator pushdown is disabled for this table");
        return itrs;
    }
    boolean binaryEncodedRow = ColumnEncoding.BINARY.getName().equalsIgnoreCase(conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE));
    int rowIdOffset = columnMapper.getRowIdOffset();
    String[] hiveColumnNamesArr = conf.getStrings(serdeConstants.LIST_COLUMNS);
    if (null == hiveColumnNamesArr) {
        throw new IllegalArgumentException("Could not find Hive columns in configuration");
    }
    String hiveRowIdColumnName = null;
    if (rowIdOffset >= 0 && rowIdOffset < hiveColumnNamesArr.length) {
        hiveRowIdColumnName = hiveColumnNamesArr[rowIdOffset];
    }
    List<String> hiveColumnNames = Arrays.asList(hiveColumnNamesArr);
    for (IndexSearchCondition sc : getSearchConditions(conf)) {
        String col = sc.getColumnDesc().getColumn();
        if (hiveRowIdColumnName == null || !hiveRowIdColumnName.equals(col)) {
            HiveAccumuloColumnMapping mapping = (HiveAccumuloColumnMapping) columnMapper.getColumnMappingForHiveColumn(hiveColumnNames, col);
            itrs.add(toSetting(mapping, sc, binaryEncodedRow));
        }
    }
    LOG.info("num iterators = " + itrs.size());
    return itrs;
}
Also used : IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) IndexSearchCondition(org.apache.hadoop.hive.ql.index.IndexSearchCondition) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping)

Aggregations

IndexSearchCondition (org.apache.hadoop.hive.ql.index.IndexSearchCondition)25 IndexPredicateAnalyzer (org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer)12 ArrayList (java.util.ArrayList)11 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)10 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)9 IOException (java.io.IOException)7 HashMap (java.util.HashMap)4 List (java.util.List)4 DecomposedPredicate (org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler.DecomposedPredicate)4 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)4 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)4 Test (org.junit.Test)4 GenericUDFOPEqual (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual)3 LinkedList (java.util.LinkedList)2 FilterList (org.apache.hadoop.hbase.filter.FilterList)2 ExprNodeConstantEvaluator (org.apache.hadoop.hive.ql.exec.ExprNodeConstantEvaluator)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)2 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)2 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)2