use of org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel in project beam by apache.
the class BeamAggregateProjectMergeRuleTest method testBeamAggregateProjectMergeRule_withNoneTable.
@Test
public void testBeamAggregateProjectMergeRule_withNoneTable() {
// When an IO does not supports project push-down, Projects should be merged with an aggregate.
String sqlQuery = "select SUM(id) as id_sum from TEST_NONE group by name";
BeamRelNode beamRel = sqlEnv.parseQuery(sqlQuery);
BeamAggregationRel aggregate = (BeamAggregationRel) beamRel.getInput(0);
BeamIOSourceRel ioSourceRel = (BeamIOSourceRel) aggregate.getInput();
// Make sure project merged with an aggregate.
assertThat(aggregate.getRowType().getFieldNames(), containsInAnyOrder("id_sum", "name"));
// IO projects al fields.
assertThat(ioSourceRel, instanceOf(BeamIOSourceRel.class));
assertThat(ioSourceRel.getRowType().getFieldNames(), containsInAnyOrder("unused1", "name", "id", "unused2"));
}
use of org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel in project beam by apache.
the class BeamAggregateProjectMergeRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
final Project project = call.rel(1);
BeamIOSourceRel io = getUnderlyingIO(new HashSet<>(), project);
// supported.
if (io == null || !io.getBeamSqlTable().supportsProjects().isSupported()) {
super.onMatch(call);
}
}
use of org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel in project beam by apache.
the class BeamIOPushDownRule method constructNodesWithPushDown.
/**
* Construct a new {@link BeamIOSourceRel} with predicate and/or project pushed-down and a new
* {@code Calc} to do field reordering/field duplication/complex projects.
*
* @param resolved A descriptor of fields used by a {@code Calc}.
* @param relBuilder A {@code RelBuilder} for constructing {@code Project} and {@code Filter} Rel
* nodes with operations unsupported by the IO.
* @param ioSourceRel Original {@code BeamIOSourceRel} we are attempting to perform push-down for.
* @param tableFilter A class containing information about IO predicate push-down capabilities.
* @param calcDataType A Calcite output schema of an original {@code Calc}.
* @param calcProjects A list of projected {@code RexNode}s by a {@code Calc}.
* @return An alternative {@code RelNode} with supported filters/projects pushed-down to IO Rel.
*/
private RelNode constructNodesWithPushDown(FieldAccessDescriptor resolved, RelBuilder relBuilder, BeamIOSourceRel ioSourceRel, BeamSqlTableFilter tableFilter, RelDataType calcDataType, List<RexNode> calcProjects) {
Schema newSchema = SelectHelpers.getOutputSchema(ioSourceRel.getBeamSqlTable().getSchema(), resolved);
RelDataType calcInputType = CalciteUtils.toCalciteRowType(newSchema, ioSourceRel.getCluster().getTypeFactory());
BeamIOSourceRel newIoSourceRel = ioSourceRel.createPushDownRel(calcInputType, newSchema.getFieldNames(), tableFilter);
relBuilder.push(newIoSourceRel);
List<RexNode> newProjects = new ArrayList<>();
List<RexNode> newFilter = new ArrayList<>();
// Ex: let's say the original fields are (number before each element is the index):
// {0:unused1, 1:id, 2:name, 3:unused2},
// where only 'id' and 'name' are being used. Then the new calcInputType should be as follows:
// {0:id, 1:name}.
// A mapping list will contain 2 entries: {0:1, 1:2},
// showing how used field names map to the original fields.
List<Integer> mapping = resolved.getFieldsAccessed().stream().map(FieldDescriptor::getFieldId).collect(Collectors.toList());
// Map filters to new RexInputRef.
for (RexNode filter : tableFilter.getNotSupported()) {
newFilter.add(reMapRexNodeToNewInputs(filter, mapping));
}
// Map projects to new RexInputRef.
for (RexNode project : calcProjects) {
newProjects.add(reMapRexNodeToNewInputs(project, mapping));
}
if (RexUtil.isIdentity(newProjects, newIoSourceRel.getRowType())) {
// Force a rename prior to filter for identity function.
relBuilder.project(newProjects, calcDataType.getFieldNames(), true);
}
relBuilder.filter(newFilter);
relBuilder.project(newProjects, calcDataType.getFieldNames());
return relBuilder.build();
}
use of org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel in project beam by apache.
the class BeamAggregateProjectMergeRule method getUnderlyingIO.
/**
* Following scenarios are possible:<br>
* 1) Aggregate <- Project <- IO.<br>
* 2) Aggregate <- Project <- Chain of Project/Filter <- IO.<br>
* 3) Aggregate <- Project <- Something else.<br>
* 4) Aggregate <- Project <- Chain of Project/Filter <- Something else.
*
* @param parent project that matched this rule.
* @return {@code BeamIOSourceRel} when it is present or null when some other {@code RelNode} is
* present.
*/
private BeamIOSourceRel getUnderlyingIO(Set<RelNode> visitedNodes, SingleRel parent) {
// No need to look at the same node more than once.
if (visitedNodes.contains(parent)) {
return null;
}
visitedNodes.add(parent);
List<RelNode> nodes = ((RelSubset) parent.getInput()).getRelList();
for (RelNode node : nodes) {
if (node instanceof Filter || node instanceof Project) {
// Search node inputs for an IO.
BeamIOSourceRel child = getUnderlyingIO(visitedNodes, (SingleRel) node);
if (child != null) {
return child;
}
} else if (node instanceof BeamIOSourceRel) {
return (BeamIOSourceRel) node;
}
}
return null;
}
use of org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel in project beam by apache.
the class BeamIOPushDownRule method onMatch.
// ~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
final BeamIOSourceRel ioSourceRel = call.rel(1);
final BeamSqlTable beamSqlTable = ioSourceRel.getBeamSqlTable();
if (ioSourceRel instanceof BeamPushDownIOSourceRel) {
return;
}
// Nested rows are not supported at the moment
for (RelDataTypeField field : ioSourceRel.getRowType().getFieldList()) {
if (field.getType() instanceof RelRecordType) {
return;
}
}
final Calc calc = call.rel(0);
final RexProgram program = calc.getProgram();
final Pair<ImmutableList<RexNode>, ImmutableList<RexNode>> projectFilter = program.split();
final RelDataType calcInputRowType = program.getInputRowType();
// When predicate push-down is not supported - all filters are unsupported.
final BeamSqlTableFilter tableFilter = beamSqlTable.constructFilter(projectFilter.right);
if (!beamSqlTable.supportsProjects().isSupported() && tableFilter instanceof DefaultTableFilter) {
// Either project or filter push-down must be supported by the IO.
return;
}
Set<String> usedFields = new LinkedHashSet<>();
if (!(tableFilter instanceof DefaultTableFilter) && !beamSqlTable.supportsProjects().isSupported()) {
// When applying standalone filter push-down all fields must be project by an IO.
// With a single exception: Calc projects all fields (in the same order) and does nothing
// else.
usedFields.addAll(calcInputRowType.getFieldNames());
} else {
// Find all input refs used by projects
for (RexNode project : projectFilter.left) {
findUtilizedInputRefs(calcInputRowType, project, usedFields);
}
// Find all input refs used by filters
for (RexNode filter : tableFilter.getNotSupported()) {
findUtilizedInputRefs(calcInputRowType, filter, usedFields);
}
}
if (usedFields.isEmpty()) {
// No need to do push-down for queries like this: "select UPPER('hello')".
return;
}
// IO only projects fields utilized by a calc.
if (tableFilter.getNotSupported().containsAll(projectFilter.right) && usedFields.containsAll(ioSourceRel.getRowType().getFieldNames())) {
return;
}
FieldAccessDescriptor resolved = FieldAccessDescriptor.withFieldNames(usedFields);
resolved = resolved.resolve(beamSqlTable.getSchema());
if (canDropCalc(program, beamSqlTable.supportsProjects(), tableFilter)) {
call.transformTo(ioSourceRel.createPushDownRel(calc.getRowType(), resolved.getFieldsAccessed().stream().map(FieldDescriptor::getFieldName).collect(Collectors.toList()), tableFilter));
return;
}
// IO only projects fields utilised by a calc.
if (tableFilter.getNotSupported().equals(projectFilter.right) && usedFields.containsAll(ioSourceRel.getRowType().getFieldNames())) {
return;
}
RelNode result = constructNodesWithPushDown(resolved, call.builder(), ioSourceRel, tableFilter, calc.getRowType(), projectFilter.left);
if (tableFilter.getNotSupported().size() <= projectFilter.right.size() || usedFields.size() < calcInputRowType.getFieldCount()) {
// Smaller Calc programs are indisputably better, as well as IOs with less projected fields.
// We can consider something with the same number of filters.
call.transformTo(result);
}
}
Aggregations