Search in sources :

Example 11 with IndexPredicateAnalyzer

use of org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer in project hive by apache.

the class CompactIndexHandler method getIndexPredicateAnalyzer.

/**
   * Instantiate a new predicate analyzer suitable for determining
   * whether we can use an index, based on rules for indexes in
   * WHERE clauses that we support
   *
   * @return preconfigured predicate analyzer for WHERE queries
   */
private IndexPredicateAnalyzer getIndexPredicateAnalyzer(Index index, Set<Partition> queryPartitions) {
    IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
    analyzer.addComparisonOp(GenericUDFOPEqual.class.getName());
    analyzer.addComparisonOp(GenericUDFOPLessThan.class.getName());
    analyzer.addComparisonOp(GenericUDFOPEqualOrLessThan.class.getName());
    analyzer.addComparisonOp(GenericUDFOPGreaterThan.class.getName());
    analyzer.addComparisonOp(GenericUDFOPEqualOrGreaterThan.class.getName());
    // only return results for columns in this index
    List<FieldSchema> columnSchemas = index.getSd().getCols();
    for (FieldSchema column : columnSchemas) {
        analyzer.allowColumnName(column.getName());
    }
    // partitioned columns are treated as if they have indexes so that the partitions
    // are used during the index query generation
    partitionCols = new HashSet<String>();
    for (Partition part : queryPartitions) {
        if (part.getSpec().isEmpty()) {
            // empty partitions are from whole tables, so we don't want to add them in
            continue;
        }
        for (String column : part.getSpec().keySet()) {
            analyzer.allowColumnName(column);
            partitionCols.add(column);
        }
    }
    return analyzer;
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) Partition(org.apache.hadoop.hive.ql.metadata.Partition) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) GenericUDFOPEqual(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual) GenericUDFOPLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan) IndexPredicateAnalyzer(org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)

Example 12 with IndexPredicateAnalyzer

use of org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer in project mongo-hadoop by mongodb.

the class HiveMongoInputFormat method getFilter.

DBObject getFilter(final JobConf conf, final Map<String, String> colToMongoNames) {
    String serializedExpr = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
    if (serializedExpr != null) {
        ExprNodeGenericFuncDesc expr = Utilities.deserializeExpression(serializedExpr);
        IndexPredicateAnalyzer analyzer = IndexPredicateAnalyzer.createAnalyzer(false);
        // Allow all column names.
        String columnNamesStr = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
        String[] columnNames = StringUtils.split(columnNamesStr, '\\', StringUtils.COMMA);
        for (String colName : columnNames) {
            analyzer.allowColumnName(colName);
        }
        List<IndexSearchCondition> searchConditions = new LinkedList<IndexSearchCondition>();
        analyzer.analyzePredicate(expr, searchConditions);
        return getFilter(searchConditions, colToMongoNames);
    }
    return null;
}
Also used : IndexSearchCondition(org.apache.hadoop.hive.ql.index.IndexSearchCondition) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) IndexPredicateAnalyzer(org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer) LinkedList(java.util.LinkedList)

Example 13 with IndexPredicateAnalyzer

use of org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer in project hive by apache.

the class HiveHBaseTableInputFormat method createFilterScan.

/**
 * Converts a filter (which has been pushed down from Hive's optimizer)
 * into corresponding restrictions on the HBase scan.  The
 * filter should already be in a form which can be fully converted.
 *
 * @param jobConf configuration for the scan
 *
 * @param iKey 0-based offset of key column within Hive table
 *
 * @return converted table split if any
 */
private Scan createFilterScan(JobConf jobConf, int iKey, int iTimestamp, boolean isKeyBinary) throws IOException {
    // TODO: assert iKey is HBaseSerDe#HBASE_KEY_COL
    Scan scan = new Scan();
    String filterObjectSerialized = jobConf.get(TableScanDesc.FILTER_OBJECT_CONF_STR);
    if (filterObjectSerialized != null) {
        HiveHBaseInputFormatUtil.setupScanRange(scan, filterObjectSerialized, jobConf, false);
        return scan;
    }
    String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
    if (filterExprSerialized == null) {
        return scan;
    }
    ExprNodeGenericFuncDesc filterExpr = SerializationUtilities.deserializeExpression(filterExprSerialized);
    String keyColName = jobConf.get(serdeConstants.LIST_COLUMNS).split(",")[iKey];
    ArrayList<TypeInfo> cols = TypeInfoUtils.getTypeInfosFromTypeString(jobConf.get(serdeConstants.LIST_COLUMN_TYPES));
    String colType = cols.get(iKey).getTypeName();
    boolean isKeyComparable = isKeyBinary || colType.equalsIgnoreCase("string");
    String tsColName = null;
    if (iTimestamp >= 0) {
        tsColName = jobConf.get(serdeConstants.LIST_COLUMNS).split(",")[iTimestamp];
    }
    IndexPredicateAnalyzer analyzer = newIndexPredicateAnalyzer(keyColName, isKeyComparable, tsColName);
    List<IndexSearchCondition> conditions = new ArrayList<IndexSearchCondition>();
    ExprNodeDesc residualPredicate = analyzer.analyzePredicate(filterExpr, conditions);
    // THIS IGNORES RESIDUAL PARSING FROM HBaseStorageHandler#decomposePredicate
    if (residualPredicate != null) {
        LOG.debug("Ignoring residual predicate " + residualPredicate.getExprString());
    }
    Map<String, List<IndexSearchCondition>> split = HiveHBaseInputFormatUtil.decompose(conditions);
    List<IndexSearchCondition> keyConditions = split.get(keyColName);
    if (keyConditions != null && !keyConditions.isEmpty()) {
        HiveHBaseInputFormatUtil.setupKeyRange(scan, keyConditions, isKeyBinary);
    }
    List<IndexSearchCondition> tsConditions = split.get(tsColName);
    if (tsConditions != null && !tsConditions.isEmpty()) {
        HiveHBaseInputFormatUtil.setupTimeRange(scan, tsConditions);
    }
    return scan;
}
Also used : IndexSearchCondition(org.apache.hadoop.hive.ql.index.IndexSearchCondition) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Scan(org.apache.hadoop.hbase.client.Scan) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) IndexPredicateAnalyzer(org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer)

Example 14 with IndexPredicateAnalyzer

use of org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer in project hive by apache.

the class AccumuloPredicateHandler method decompose.

/**
 * @param conf
 *          Configuration
 * @param desc
 *          predicate expression node.
 * @return DecomposedPredicate containing translated search conditions the analyzer can support.
 */
public DecomposedPredicate decompose(Configuration conf, ExprNodeDesc desc) {
    IndexPredicateAnalyzer analyzer = newAnalyzer(conf);
    List<IndexSearchCondition> sConditions = new ArrayList<IndexSearchCondition>();
    ExprNodeDesc residualPredicate = analyzer.analyzePredicate(desc, sConditions);
    if (sConditions.size() == 0) {
        LOG.info("nothing to decompose. Returning");
        return null;
    }
    DecomposedPredicate decomposedPredicate = new DecomposedPredicate();
    decomposedPredicate.pushedPredicate = analyzer.translateSearchConditions(sConditions);
    decomposedPredicate.residualPredicate = (ExprNodeGenericFuncDesc) residualPredicate;
    return decomposedPredicate;
}
Also used : DecomposedPredicate(org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler.DecomposedPredicate) IndexSearchCondition(org.apache.hadoop.hive.ql.index.IndexSearchCondition) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) IndexPredicateAnalyzer(org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer)

Aggregations

IndexPredicateAnalyzer (org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer)14 IndexSearchCondition (org.apache.hadoop.hive.ql.index.IndexSearchCondition)11 ArrayList (java.util.ArrayList)7 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)5 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)5 DecomposedPredicate (org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler.DecomposedPredicate)3 LinkedList (java.util.LinkedList)2 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)2 Index (org.apache.hadoop.hive.metastore.api.Index)2 Partition (org.apache.hadoop.hive.ql.metadata.Partition)2 GenericUDFOPEqual (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual)2 GenericUDFOPEqualOrGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)2 GenericUDFOPEqualOrLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan)2 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)2 GenericUDFOPLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan)2 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)2 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1