use of org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer in project hive by apache.
the class CompactIndexHandler method getIndexPredicateAnalyzer.
/**
* Instantiate a new predicate analyzer suitable for determining
* whether we can use an index, based on rules for indexes in
* WHERE clauses that we support
*
* @return preconfigured predicate analyzer for WHERE queries
*/
private IndexPredicateAnalyzer getIndexPredicateAnalyzer(Index index, Set<Partition> queryPartitions) {
IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
analyzer.addComparisonOp(GenericUDFOPEqual.class.getName());
analyzer.addComparisonOp(GenericUDFOPLessThan.class.getName());
analyzer.addComparisonOp(GenericUDFOPEqualOrLessThan.class.getName());
analyzer.addComparisonOp(GenericUDFOPGreaterThan.class.getName());
analyzer.addComparisonOp(GenericUDFOPEqualOrGreaterThan.class.getName());
// only return results for columns in this index
List<FieldSchema> columnSchemas = index.getSd().getCols();
for (FieldSchema column : columnSchemas) {
analyzer.allowColumnName(column.getName());
}
// partitioned columns are treated as if they have indexes so that the partitions
// are used during the index query generation
partitionCols = new HashSet<String>();
for (Partition part : queryPartitions) {
if (part.getSpec().isEmpty()) {
// empty partitions are from whole tables, so we don't want to add them in
continue;
}
for (String column : part.getSpec().keySet()) {
analyzer.allowColumnName(column);
partitionCols.add(column);
}
}
return analyzer;
}
use of org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer in project mongo-hadoop by mongodb.
the class HiveMongoInputFormat method getFilter.
DBObject getFilter(final JobConf conf, final Map<String, String> colToMongoNames) {
String serializedExpr = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
if (serializedExpr != null) {
ExprNodeGenericFuncDesc expr = Utilities.deserializeExpression(serializedExpr);
IndexPredicateAnalyzer analyzer = IndexPredicateAnalyzer.createAnalyzer(false);
// Allow all column names.
String columnNamesStr = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
String[] columnNames = StringUtils.split(columnNamesStr, '\\', StringUtils.COMMA);
for (String colName : columnNames) {
analyzer.allowColumnName(colName);
}
List<IndexSearchCondition> searchConditions = new LinkedList<IndexSearchCondition>();
analyzer.analyzePredicate(expr, searchConditions);
return getFilter(searchConditions, colToMongoNames);
}
return null;
}
use of org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer in project hive by apache.
the class HiveHBaseTableInputFormat method createFilterScan.
/**
* Converts a filter (which has been pushed down from Hive's optimizer)
* into corresponding restrictions on the HBase scan. The
* filter should already be in a form which can be fully converted.
*
* @param jobConf configuration for the scan
*
* @param iKey 0-based offset of key column within Hive table
*
* @return converted table split if any
*/
private Scan createFilterScan(JobConf jobConf, int iKey, int iTimestamp, boolean isKeyBinary) throws IOException {
// TODO: assert iKey is HBaseSerDe#HBASE_KEY_COL
Scan scan = new Scan();
String filterObjectSerialized = jobConf.get(TableScanDesc.FILTER_OBJECT_CONF_STR);
if (filterObjectSerialized != null) {
HiveHBaseInputFormatUtil.setupScanRange(scan, filterObjectSerialized, jobConf, false);
return scan;
}
String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
if (filterExprSerialized == null) {
return scan;
}
ExprNodeGenericFuncDesc filterExpr = SerializationUtilities.deserializeExpression(filterExprSerialized);
String keyColName = jobConf.get(serdeConstants.LIST_COLUMNS).split(",")[iKey];
ArrayList<TypeInfo> cols = TypeInfoUtils.getTypeInfosFromTypeString(jobConf.get(serdeConstants.LIST_COLUMN_TYPES));
String colType = cols.get(iKey).getTypeName();
boolean isKeyComparable = isKeyBinary || colType.equalsIgnoreCase("string");
String tsColName = null;
if (iTimestamp >= 0) {
tsColName = jobConf.get(serdeConstants.LIST_COLUMNS).split(",")[iTimestamp];
}
IndexPredicateAnalyzer analyzer = newIndexPredicateAnalyzer(keyColName, isKeyComparable, tsColName);
List<IndexSearchCondition> conditions = new ArrayList<IndexSearchCondition>();
ExprNodeDesc residualPredicate = analyzer.analyzePredicate(filterExpr, conditions);
// THIS IGNORES RESIDUAL PARSING FROM HBaseStorageHandler#decomposePredicate
if (residualPredicate != null) {
LOG.debug("Ignoring residual predicate " + residualPredicate.getExprString());
}
Map<String, List<IndexSearchCondition>> split = HiveHBaseInputFormatUtil.decompose(conditions);
List<IndexSearchCondition> keyConditions = split.get(keyColName);
if (keyConditions != null && !keyConditions.isEmpty()) {
HiveHBaseInputFormatUtil.setupKeyRange(scan, keyConditions, isKeyBinary);
}
List<IndexSearchCondition> tsConditions = split.get(tsColName);
if (tsConditions != null && !tsConditions.isEmpty()) {
HiveHBaseInputFormatUtil.setupTimeRange(scan, tsConditions);
}
return scan;
}
use of org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer in project hive by apache.
the class AccumuloPredicateHandler method decompose.
/**
* @param conf
* Configuration
* @param desc
* predicate expression node.
* @return DecomposedPredicate containing translated search conditions the analyzer can support.
*/
public DecomposedPredicate decompose(Configuration conf, ExprNodeDesc desc) {
IndexPredicateAnalyzer analyzer = newAnalyzer(conf);
List<IndexSearchCondition> sConditions = new ArrayList<IndexSearchCondition>();
ExprNodeDesc residualPredicate = analyzer.analyzePredicate(desc, sConditions);
if (sConditions.size() == 0) {
LOG.info("nothing to decompose. Returning");
return null;
}
DecomposedPredicate decomposedPredicate = new DecomposedPredicate();
decomposedPredicate.pushedPredicate = analyzer.translateSearchConditions(sConditions);
decomposedPredicate.residualPredicate = (ExprNodeGenericFuncDesc) residualPredicate;
return decomposedPredicate;
}
Aggregations