use of org.apache.hadoop.hive.ql.index.IndexSearchCondition in project hive by apache.
the class HiveHBaseTableInputFormat method setupTimeRange.
private void setupTimeRange(Scan scan, List<IndexSearchCondition> conditions) throws IOException {
long start = 0;
long end = Long.MAX_VALUE;
for (IndexSearchCondition sc : conditions) {
long timestamp = getTimestampVal(sc);
String comparisonOp = sc.getComparisonOp();
if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual".equals(comparisonOp)) {
start = timestamp;
end = timestamp + 1;
} else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan".equals(comparisonOp)) {
end = timestamp;
} else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan".equals(comparisonOp)) {
start = timestamp;
} else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan".equals(comparisonOp)) {
start = timestamp + 1;
} else if ("org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan".equals(comparisonOp)) {
end = timestamp + 1;
} else {
throw new IOException(comparisonOp + " is not a supported comparison operator");
}
}
scan.setTimeRange(start, end);
}
use of org.apache.hadoop.hive.ql.index.IndexSearchCondition in project cdap by caskdata.
the class HiveStreamInputFormat method setupBuilder.
/**
* Setups the given {@link StreamInputSplitFinder.Builder} by analyzing the query.
*/
private StreamInputSplitFinder.Builder setupBuilder(Configuration conf, StreamConfig streamConfig, StreamInputSplitFinder.Builder builder) {
// the conf contains a 'hive.io.filter.expr.serialized' key which contains the serialized form of ExprNodeDesc
long startTime = Math.max(0L, System.currentTimeMillis() - streamConfig.getTTL());
long endTime = System.currentTimeMillis();
String serializedExpr = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
if (serializedExpr == null) {
return builder.setStartTime(startTime).setEndTime(endTime);
}
try {
ExprNodeGenericFuncDesc expr = HiveUtilities.deserializeExpression(serializedExpr, conf);
// Analyze the query to extract predicates that can be used for indexing (i.e. setting start/end time)
IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
for (CompareOp op : CompareOp.values()) {
analyzer.addComparisonOp(op.getOpClassName());
}
// Stream can only be indexed by timestamp
analyzer.clearAllowedColumnNames();
analyzer.allowColumnName("ts");
List<IndexSearchCondition> conditions = Lists.newArrayList();
analyzer.analyzePredicate(expr, conditions);
for (IndexSearchCondition condition : conditions) {
CompareOp op = CompareOp.from(condition.getComparisonOp());
if (op == null) {
// Not a supported operation
continue;
}
ExprNodeConstantDesc value = condition.getConstantDesc();
if (value == null || !(value.getValue() instanceof Long)) {
// Not a supported value
continue;
}
long timestamp = (Long) value.getValue();
// If there is a equal, set both start and endtime and no need to inspect further
if (op == CompareOp.EQUAL) {
startTime = timestamp;
endTime = (timestamp < Long.MAX_VALUE) ? timestamp + 1L : timestamp;
break;
}
if (op == CompareOp.GREATER || op == CompareOp.EQUAL_OR_GREATER) {
// Plus 1 for the start time if it is greater since start time is inclusive in stream
startTime = Math.max(startTime, timestamp + (timestamp < Long.MAX_VALUE && op == CompareOp.GREATER ? 1L : 0L));
} else {
// Plus 1 for end time if it is equal or less since end time is exclusive in stream
endTime = Math.min(endTime, timestamp + (timestamp < Long.MAX_VALUE && op == CompareOp.EQUAL_OR_LESS ? 1L : 0L));
}
}
} catch (Throwable t) {
LOG.warn("Exception analyzing query predicate. A full table scan will be performed.", t);
}
return builder.setStartTime(startTime).setEndTime(endTime);
}
use of org.apache.hadoop.hive.ql.index.IndexSearchCondition in project mongo-hadoop by mongodb.
the class MongoStorageHandler method decomposePredicate.
@Override
public DecomposedPredicate decomposePredicate(final JobConf jobConf, final Deserializer deserializer, final ExprNodeDesc predicate) {
BSONSerDe serde = (BSONSerDe) deserializer;
// Create a new analyzer capable of handling equality and general
// binary comparisons (false = "more than just equality").
// TODO: The analyzer is only capable of handling binary comparison
// expressions, but we could push down more than that in the future by
// writing our own analyzer.
IndexPredicateAnalyzer analyzer = IndexPredicateAnalyzer.createAnalyzer(false);
// Predicate may contain any column.
for (String colName : serde.columnNames) {
analyzer.allowColumnName(colName);
}
List<IndexSearchCondition> searchConditions = new LinkedList<IndexSearchCondition>();
ExprNodeDesc residual = analyzer.analyzePredicate(predicate, searchConditions);
DecomposedPredicate decomposed = new DecomposedPredicate();
decomposed.pushedPredicate = analyzer.translateSearchConditions(searchConditions);
decomposed.residualPredicate = (ExprNodeGenericFuncDesc) residual;
return decomposed;
}
use of org.apache.hadoop.hive.ql.index.IndexSearchCondition in project hive by apache.
the class AccumuloPredicateHandler method getSearchConditions.
/**
* @param conf
* Configuration
* @return list of IndexSearchConditions from the filter expression.
*/
public List<IndexSearchCondition> getSearchConditions(Configuration conf) {
final List<IndexSearchCondition> sConditions = Lists.newArrayList();
ExprNodeDesc filterExpr = getExpression(conf);
if (null == filterExpr) {
return sConditions;
}
IndexPredicateAnalyzer analyzer = newAnalyzer(conf);
ExprNodeDesc residual = analyzer.analyzePredicate(filterExpr, sConditions);
if (residual != null) {
throw new RuntimeException("Unexpected residual predicate: " + residual.getExprString());
}
return sConditions;
}
use of org.apache.hadoop.hive.ql.index.IndexSearchCondition in project hive by apache.
the class AccumuloPredicateHandler method getIterators.
/**
* Loop through search conditions and build iterator settings for predicates involving columns
* other than rowID, if any.
*
* @param conf
* Configuration
* @throws SerDeException
*/
public List<IteratorSetting> getIterators(Configuration conf, ColumnMapper columnMapper) throws SerDeException {
List<IteratorSetting> itrs = Lists.newArrayList();
boolean shouldPushdown = conf.getBoolean(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, AccumuloSerDeParameters.ITERATOR_PUSHDOWN_DEFAULT);
if (!shouldPushdown) {
LOG.info("Iterator pushdown is disabled for this table");
return itrs;
}
boolean binaryEncodedRow = ColumnEncoding.BINARY.getName().equalsIgnoreCase(conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE));
int rowIdOffset = columnMapper.getRowIdOffset();
String[] hiveColumnNamesArr = conf.getStrings(serdeConstants.LIST_COLUMNS);
if (null == hiveColumnNamesArr) {
throw new IllegalArgumentException("Could not find Hive columns in configuration");
}
String hiveRowIdColumnName = null;
if (rowIdOffset >= 0 && rowIdOffset < hiveColumnNamesArr.length) {
hiveRowIdColumnName = hiveColumnNamesArr[rowIdOffset];
}
List<String> hiveColumnNames = Arrays.asList(hiveColumnNamesArr);
for (IndexSearchCondition sc : getSearchConditions(conf)) {
String col = sc.getColumnDesc().getColumn();
if (hiveRowIdColumnName == null || !hiveRowIdColumnName.equals(col)) {
HiveAccumuloColumnMapping mapping = (HiveAccumuloColumnMapping) columnMapper.getColumnMappingForHiveColumn(hiveColumnNames, col);
itrs.add(toSetting(mapping, sc, binaryEncodedRow));
}
}
LOG.info("num iterators = " + itrs.size());
return itrs;
}
Aggregations