use of org.apache.drill.common.expression.LogicalExpression in project drill by apache.
the class HBasePushFilterIntoScan method doPushFilterToScan.
protected void doPushFilterToScan(final RelOptRuleCall call, final FilterPrel filter, final ProjectPrel project, final ScanPrel scan, final HBaseGroupScan groupScan, final RexNode condition) {
final LogicalExpression conditionExp = DrillOptiq.toDrill(new DrillParseContext(PrelUtil.getPlannerSettings(call.getPlanner())), scan, condition);
final HBaseFilterBuilder hbaseFilterBuilder = new HBaseFilterBuilder(groupScan, conditionExp);
final HBaseScanSpec newScanSpec = hbaseFilterBuilder.parseTree();
if (newScanSpec == null) {
//no filter pushdown ==> No transformation.
return;
}
final HBaseGroupScan newGroupsScan = new HBaseGroupScan(groupScan.getUserName(), groupScan.getStoragePlugin(), newScanSpec, groupScan.getColumns());
newGroupsScan.setFilterPushedDown(true);
final ScanPrel newScanPrel = ScanPrel.create(scan, filter.getTraitSet(), newGroupsScan, scan.getRowType());
// Depending on whether is a project in the middle, assign either scan or copy of project to childRel.
final RelNode childRel = project == null ? newScanPrel : project.copy(project.getTraitSet(), ImmutableList.of((RelNode) newScanPrel));
;
if (hbaseFilterBuilder.isAllExpressionsConverted()) {
/*
* Since we could convert the entire filter condition expression into an HBase filter,
* we can eliminate the filter operator altogether.
*/
call.transformTo(childRel);
} else {
call.transformTo(filter.copy(filter.getTraitSet(), ImmutableList.of(childRel)));
}
}
use of org.apache.drill.common.expression.LogicalExpression in project drill by apache.
the class CompareFunctionsProcessor method visitConvertExpression.
@Override
public Boolean visitConvertExpression(ConvertExpression e, LogicalExpression valueArg) throws RuntimeException {
if (e.getConvertFunction() == ConvertExpression.CONVERT_FROM) {
String encodingType = e.getEncodingType();
int prefixLength = 0;
// CONVERT_FROM(BYTE_SUBSTR(row_key, 1, 8), 'DATE_EPOCH_BE') < DATE '2015-06-17'
if (e.getInput() instanceof FunctionCall) {
// We can prune scan range only for big-endian encoded data
if (encodingType.endsWith("_BE") == false) {
return false;
}
FunctionCall call = (FunctionCall) e.getInput();
String functionName = call.getName();
if (!functionName.equalsIgnoreCase("byte_substr")) {
return false;
}
LogicalExpression nameArg = call.args.get(0);
LogicalExpression valueArg1 = call.args.size() >= 2 ? call.args.get(1) : null;
LogicalExpression valueArg2 = call.args.size() >= 3 ? call.args.get(2) : null;
if (((nameArg instanceof SchemaPath) == false) || (valueArg1 == null) || ((valueArg1 instanceof IntExpression) == false) || (valueArg2 == null) || ((valueArg2 instanceof IntExpression) == false)) {
return false;
}
boolean isRowKey = ((SchemaPath) nameArg).getAsUnescapedPath().equals(DrillHBaseConstants.ROW_KEY);
int offset = ((IntExpression) valueArg1).getInt();
if (!isRowKey || (offset != 1)) {
return false;
}
this.path = (SchemaPath) nameArg;
prefixLength = ((IntExpression) valueArg2).getInt();
this.isRowKeyPrefixComparison = true;
return visitRowKeyPrefixConvertExpression(e, prefixLength, valueArg);
}
if (e.getInput() instanceof SchemaPath) {
ByteBuf bb = null;
switch(encodingType) {
case "INT_BE":
case "INT":
case "UINT_BE":
case "UINT":
case "UINT4_BE":
case "UINT4":
if (valueArg instanceof IntExpression && (isEqualityFn || encodingType.startsWith("U"))) {
bb = newByteBuf(4, encodingType.endsWith("_BE"));
bb.writeInt(((IntExpression) valueArg).getInt());
}
break;
case "BIGINT_BE":
case "BIGINT":
case "UINT8_BE":
case "UINT8":
if (valueArg instanceof LongExpression && (isEqualityFn || encodingType.startsWith("U"))) {
bb = newByteBuf(8, encodingType.endsWith("_BE"));
bb.writeLong(((LongExpression) valueArg).getLong());
}
break;
case "FLOAT":
if (valueArg instanceof FloatExpression && isEqualityFn) {
bb = newByteBuf(4, true);
bb.writeFloat(((FloatExpression) valueArg).getFloat());
}
break;
case "DOUBLE":
if (valueArg instanceof DoubleExpression && isEqualityFn) {
bb = newByteBuf(8, true);
bb.writeDouble(((DoubleExpression) valueArg).getDouble());
}
break;
case "TIME_EPOCH":
case "TIME_EPOCH_BE":
if (valueArg instanceof TimeExpression) {
bb = newByteBuf(8, encodingType.endsWith("_BE"));
bb.writeLong(((TimeExpression) valueArg).getTime());
}
break;
case "DATE_EPOCH":
case "DATE_EPOCH_BE":
if (valueArg instanceof DateExpression) {
bb = newByteBuf(8, encodingType.endsWith("_BE"));
bb.writeLong(((DateExpression) valueArg).getDate());
}
break;
case "BOOLEAN_BYTE":
if (valueArg instanceof BooleanExpression) {
bb = newByteBuf(1, false);
bb.writeByte(((BooleanExpression) valueArg).getBoolean() ? 1 : 0);
}
break;
case "DOUBLE_OB":
case "DOUBLE_OBD":
if (valueArg instanceof DoubleExpression) {
bb = newByteBuf(9, true);
PositionedByteRange br = new SimplePositionedMutableByteRange(bb.array(), 0, 9);
if (encodingType.endsWith("_OBD")) {
org.apache.hadoop.hbase.util.OrderedBytes.encodeFloat64(br, ((DoubleExpression) valueArg).getDouble(), Order.DESCENDING);
this.sortOrderAscending = false;
} else {
org.apache.hadoop.hbase.util.OrderedBytes.encodeFloat64(br, ((DoubleExpression) valueArg).getDouble(), Order.ASCENDING);
}
}
break;
case "FLOAT_OB":
case "FLOAT_OBD":
if (valueArg instanceof FloatExpression) {
bb = newByteBuf(5, true);
PositionedByteRange br = new SimplePositionedMutableByteRange(bb.array(), 0, 5);
if (encodingType.endsWith("_OBD")) {
org.apache.hadoop.hbase.util.OrderedBytes.encodeFloat32(br, ((FloatExpression) valueArg).getFloat(), Order.DESCENDING);
this.sortOrderAscending = false;
} else {
org.apache.hadoop.hbase.util.OrderedBytes.encodeFloat32(br, ((FloatExpression) valueArg).getFloat(), Order.ASCENDING);
}
}
break;
case "BIGINT_OB":
case "BIGINT_OBD":
if (valueArg instanceof LongExpression) {
bb = newByteBuf(9, true);
PositionedByteRange br = new SimplePositionedMutableByteRange(bb.array(), 0, 9);
if (encodingType.endsWith("_OBD")) {
org.apache.hadoop.hbase.util.OrderedBytes.encodeInt64(br, ((LongExpression) valueArg).getLong(), Order.DESCENDING);
this.sortOrderAscending = false;
} else {
org.apache.hadoop.hbase.util.OrderedBytes.encodeInt64(br, ((LongExpression) valueArg).getLong(), Order.ASCENDING);
}
}
break;
case "INT_OB":
case "INT_OBD":
if (valueArg instanceof IntExpression) {
bb = newByteBuf(5, true);
PositionedByteRange br = new SimplePositionedMutableByteRange(bb.array(), 0, 5);
if (encodingType.endsWith("_OBD")) {
org.apache.hadoop.hbase.util.OrderedBytes.encodeInt32(br, ((IntExpression) valueArg).getInt(), Order.DESCENDING);
this.sortOrderAscending = false;
} else {
org.apache.hadoop.hbase.util.OrderedBytes.encodeInt32(br, ((IntExpression) valueArg).getInt(), Order.ASCENDING);
}
}
break;
case "UTF8":
// let visitSchemaPath() handle this.
return e.getInput().accept(this, valueArg);
}
if (bb != null) {
this.value = bb.array();
this.path = (SchemaPath) e.getInput();
return true;
}
}
}
return false;
}
use of org.apache.drill.common.expression.LogicalExpression in project drill by apache.
the class PruneScanRule method doOnMatch.
protected void doOnMatch(RelOptRuleCall call, Filter filterRel, Project projectRel, TableScan scanRel) {
final String pruningClassName = getClass().getName();
logger.info("Beginning partition pruning, pruning class: {}", pruningClassName);
Stopwatch totalPruningTime = Stopwatch.createStarted();
final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
PartitionDescriptor descriptor = getPartitionDescriptor(settings, scanRel);
final BufferAllocator allocator = optimizerContext.getAllocator();
final Object selection = getDrillTable(scanRel).getSelection();
MetadataContext metaContext = null;
if (selection instanceof FormatSelection) {
metaContext = ((FormatSelection) selection).getSelection().getMetaContext();
}
RexNode condition = null;
if (projectRel == null) {
condition = filterRel.getCondition();
} else {
// get the filter as if it were below the projection.
condition = RelOptUtil.pushFilterPastProject(filterRel.getCondition(), projectRel);
}
RewriteAsBinaryOperators visitor = new RewriteAsBinaryOperators(true, filterRel.getCluster().getRexBuilder());
condition = condition.accept(visitor);
Map<Integer, String> fieldNameMap = Maps.newHashMap();
List<String> fieldNames = scanRel.getRowType().getFieldNames();
BitSet columnBitset = new BitSet();
BitSet partitionColumnBitSet = new BitSet();
Map<Integer, Integer> partitionMap = Maps.newHashMap();
int relColIndex = 0;
for (String field : fieldNames) {
final Integer partitionIndex = descriptor.getIdIfValid(field);
if (partitionIndex != null) {
fieldNameMap.put(partitionIndex, field);
partitionColumnBitSet.set(partitionIndex);
columnBitset.set(relColIndex);
// mapping between the relColIndex and partitionIndex
partitionMap.put(relColIndex, partitionIndex);
}
relColIndex++;
}
if (partitionColumnBitSet.isEmpty()) {
logger.info("No partition columns are projected from the scan..continue. " + "Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
return;
}
// stop watch to track how long we spend in different phases of pruning
Stopwatch miscTimer = Stopwatch.createUnstarted();
// track how long we spend building the filter tree
miscTimer.start();
FindPartitionConditions c = new FindPartitionConditions(columnBitset, filterRel.getCluster().getRexBuilder());
c.analyze(condition);
RexNode pruneCondition = c.getFinalCondition();
BitSet referencedDirsBitSet = c.getReferencedDirs();
logger.info("Total elapsed time to build and analyze filter tree: {} ms", miscTimer.elapsed(TimeUnit.MILLISECONDS));
miscTimer.reset();
if (pruneCondition == null) {
logger.info("No conditions were found eligible for partition pruning." + "Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
return;
}
// set up the partitions
List<PartitionLocation> newPartitions = Lists.newArrayList();
// total number of partitions
long numTotal = 0;
int batchIndex = 0;
PartitionLocation firstLocation = null;
LogicalExpression materializedExpr = null;
String[] spInfo = null;
int maxIndex = -1;
BitSet matchBitSet = new BitSet();
// Outer loop: iterate over a list of batches of PartitionLocations
for (List<PartitionLocation> partitions : descriptor) {
numTotal += partitions.size();
logger.debug("Evaluating partition pruning for batch {}", batchIndex);
if (batchIndex == 0) {
// save the first location in case everything is pruned
firstLocation = partitions.get(0);
}
final NullableBitVector output = new NullableBitVector(MaterializedField.create("", Types.optional(MinorType.BIT)), allocator);
final VectorContainer container = new VectorContainer();
try {
final ValueVector[] vectors = new ValueVector[descriptor.getMaxHierarchyLevel()];
for (int partitionColumnIndex : BitSets.toIter(partitionColumnBitSet)) {
SchemaPath column = SchemaPath.getSimplePath(fieldNameMap.get(partitionColumnIndex));
MajorType type = descriptor.getVectorType(column, settings);
MaterializedField field = MaterializedField.create(column.getAsUnescapedPath(), type);
ValueVector v = TypeHelper.getNewVector(field, allocator);
v.allocateNew();
vectors[partitionColumnIndex] = v;
container.add(v);
}
// track how long we spend populating partition column vectors
miscTimer.start();
// populate partition vectors.
descriptor.populatePartitionVectors(vectors, partitions, partitionColumnBitSet, fieldNameMap);
logger.info("Elapsed time to populate partitioning column vectors: {} ms within batchIndex: {}", miscTimer.elapsed(TimeUnit.MILLISECONDS), batchIndex);
miscTimer.reset();
// materialize the expression; only need to do this once
if (batchIndex == 0) {
materializedExpr = materializePruneExpr(pruneCondition, settings, scanRel, container);
if (materializedExpr == null) {
// continue without partition pruning; no need to log anything here since
// materializePruneExpr logs it already
logger.info("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
return;
}
}
output.allocateNew(partitions.size());
// start the timer to evaluate how long we spend in the interpreter evaluation
miscTimer.start();
InterpreterEvaluator.evaluate(partitions.size(), optimizerContext, container, output, materializedExpr);
logger.info("Elapsed time in interpreter evaluation: {} ms within batchIndex: {} with # of partitions : {}", miscTimer.elapsed(TimeUnit.MILLISECONDS), batchIndex, partitions.size());
miscTimer.reset();
int recordCount = 0;
int qualifiedCount = 0;
if (descriptor.supportsMetadataCachePruning() && partitions.get(0).isCompositePartition()) /* apply single partition check only for composite partitions */
{
// Inner loop: within each batch iterate over the PartitionLocations
for (PartitionLocation part : partitions) {
assert part.isCompositePartition();
if (!output.getAccessor().isNull(recordCount) && output.getAccessor().get(recordCount) == 1) {
newPartitions.add(part);
// Rather than using the PartitionLocation, get the array of partition values for the directories that are
// referenced by the filter since we are not interested in directory references in other parts of the query.
Pair<String[], Integer> p = composePartition(referencedDirsBitSet, partitionMap, vectors, recordCount);
String[] parts = p.getLeft();
int tmpIndex = p.getRight();
maxIndex = Math.max(maxIndex, tmpIndex);
if (spInfo == null) {
// initialization
spInfo = parts;
for (int j = 0; j <= tmpIndex; j++) {
if (parts[j] != null) {
matchBitSet.set(j);
}
}
} else {
// compare the new partition with existing partition
for (int j = 0; j <= tmpIndex; j++) {
if (parts[j] == null || spInfo[j] == null) {
// nulls don't match
matchBitSet.clear(j);
} else {
if (!parts[j].equals(spInfo[j])) {
matchBitSet.clear(j);
}
}
}
}
qualifiedCount++;
}
recordCount++;
}
} else {
// Inner loop: within each batch iterate over the PartitionLocations
for (PartitionLocation part : partitions) {
if (!output.getAccessor().isNull(recordCount) && output.getAccessor().get(recordCount) == 1) {
newPartitions.add(part);
qualifiedCount++;
}
recordCount++;
}
}
logger.debug("Within batch {}: total records: {}, qualified records: {}", batchIndex, recordCount, qualifiedCount);
batchIndex++;
} catch (Exception e) {
logger.warn("Exception while trying to prune partition.", e);
logger.info("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
setPruneStatus(metaContext, PruneStatus.NOT_PRUNED);
// continue without partition pruning
return;
} finally {
container.clear();
if (output != null) {
output.clear();
}
}
}
try {
if (newPartitions.size() == numTotal) {
logger.info("No partitions were eligible for pruning");
return;
}
// handle the case all partitions are filtered out.
boolean canDropFilter = true;
boolean wasAllPartitionsPruned = false;
String cacheFileRoot = null;
if (newPartitions.isEmpty()) {
assert firstLocation != null;
// Add the first non-composite partition location, since execution requires schema.
// In such case, we should not drop filter.
newPartitions.add(firstLocation.getPartitionLocationRecursive().get(0));
canDropFilter = false;
// NOTE: with DRILL-4530, the PruneScanRule may be called with only a list of
// directories first and the non-composite partition location will still return
// directories, not files. So, additional processing is done depending on this flag
wasAllPartitionsPruned = true;
logger.info("All {} partitions were pruned; added back a single partition to allow creating a schema", numTotal);
// set the cacheFileRoot appropriately
if (firstLocation.isCompositePartition()) {
cacheFileRoot = descriptor.getBaseTableLocation() + firstLocation.getCompositePartitionPath();
}
}
logger.info("Pruned {} partitions down to {}", numTotal, newPartitions.size());
List<RexNode> conjuncts = RelOptUtil.conjunctions(condition);
List<RexNode> pruneConjuncts = RelOptUtil.conjunctions(pruneCondition);
conjuncts.removeAll(pruneConjuncts);
RexNode newCondition = RexUtil.composeConjunction(filterRel.getCluster().getRexBuilder(), conjuncts, false);
RewriteCombineBinaryOperators reverseVisitor = new RewriteCombineBinaryOperators(true, filterRel.getCluster().getRexBuilder());
condition = condition.accept(reverseVisitor);
pruneCondition = pruneCondition.accept(reverseVisitor);
if (descriptor.supportsMetadataCachePruning() && !wasAllPartitionsPruned) {
// if metadata cache file could potentially be used, then assign a proper cacheFileRoot
int index = -1;
if (!matchBitSet.isEmpty()) {
String path = "";
index = matchBitSet.length() - 1;
for (int j = 0; j < matchBitSet.length(); j++) {
if (!matchBitSet.get(j)) {
// stop at the first index with no match and use the immediate
// previous index
index = j - 1;
break;
}
}
for (int j = 0; j <= index; j++) {
path += "/" + spInfo[j];
}
cacheFileRoot = descriptor.getBaseTableLocation() + path;
}
if (index != maxIndex) {
// if multiple partitions are being selected, we should not drop the filter
// since we are reading the cache file at a parent/ancestor level
canDropFilter = false;
}
}
RelNode inputRel = descriptor.supportsMetadataCachePruning() ? descriptor.createTableScan(newPartitions, cacheFileRoot, wasAllPartitionsPruned, metaContext) : descriptor.createTableScan(newPartitions, wasAllPartitionsPruned);
if (projectRel != null) {
inputRel = projectRel.copy(projectRel.getTraitSet(), Collections.singletonList(inputRel));
}
if (newCondition.isAlwaysTrue() && canDropFilter) {
call.transformTo(inputRel);
} else {
final RelNode newFilter = filterRel.copy(filterRel.getTraitSet(), Collections.singletonList(inputRel));
call.transformTo(newFilter);
}
setPruneStatus(metaContext, PruneStatus.PRUNED);
} catch (Exception e) {
logger.warn("Exception while using the pruned partitions.", e);
} finally {
logger.info("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS));
}
}
use of org.apache.drill.common.expression.LogicalExpression in project drill by apache.
the class HashPrelUtil method getHashExpression.
/**
* Create a distribution hash expression.
*
* @param fields Distribution fields
* @param rowType Row type
* @return
*/
public static LogicalExpression getHashExpression(List<DistributionField> fields, RelDataType rowType) {
assert fields.size() > 0;
final List<String> childFields = rowType.getFieldNames();
// If we already included a field with hash - no need to calculate hash further down
if (childFields.contains(HASH_EXPR_NAME)) {
return new FieldReference(HASH_EXPR_NAME);
}
final List<LogicalExpression> expressions = new ArrayList<LogicalExpression>(childFields.size());
for (int i = 0; i < fields.size(); i++) {
expressions.add(new FieldReference(childFields.get(fields.get(i).getFieldId()), ExpressionPosition.UNKNOWN));
}
final LogicalExpression distSeed = ValueExpressions.getInt(DIST_SEED);
return createHashBasedPartitionExpression(expressions, distSeed, HASH_HELPER_LOGICALEXPRESSION);
}
use of org.apache.drill.common.expression.LogicalExpression in project drill by apache.
the class PruneScanRule method materializePruneExpr.
protected LogicalExpression materializePruneExpr(RexNode pruneCondition, PlannerSettings settings, RelNode scanRel, VectorContainer container) {
// materialize the expression
logger.debug("Attempting to prune {}", pruneCondition);
final LogicalExpression expr = DrillOptiq.toDrill(new DrillParseContext(settings), scanRel, pruneCondition);
final ErrorCollectorImpl errors = new ErrorCollectorImpl();
LogicalExpression materializedExpr = ExpressionTreeMaterializer.materialize(expr, container, errors, optimizerContext.getFunctionRegistry());
// it's same as the type of output vector.
if (materializedExpr.getMajorType().getMode() == TypeProtos.DataMode.REQUIRED) {
materializedExpr = ExpressionTreeMaterializer.convertToNullableType(materializedExpr, materializedExpr.getMajorType().getMinorType(), optimizerContext.getFunctionRegistry(), errors);
}
if (errors.getErrorCount() != 0) {
logger.warn("Failure while materializing expression [{}]. Errors: {}", expr, errors);
return null;
}
return materializedExpr;
}
Aggregations