use of org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter in project beam by apache.
the class MongoDbTable method buildIOReader.
@Override
public PCollection<Row> buildIOReader(PBegin begin, BeamSqlTableFilter filters, List<String> fieldNames) {
MongoDbIO.Read readInstance = MongoDbIO.read().withUri(dbUri).withDatabase(dbName).withCollection(dbCollection);
final FieldAccessDescriptor resolved = FieldAccessDescriptor.withFieldNames(fieldNames).resolve(getSchema());
final Schema newSchema = SelectHelpers.getOutputSchema(getSchema(), resolved);
FindQuery findQuery = FindQuery.create();
if (!(filters instanceof DefaultTableFilter)) {
MongoDbFilter mongoFilter = (MongoDbFilter) filters;
if (!mongoFilter.getSupported().isEmpty()) {
Bson filter = constructPredicate(mongoFilter.getSupported());
LOG.info("Pushing down the following filter: " + filter.toString());
findQuery = findQuery.withFilters(filter);
}
}
if (!fieldNames.isEmpty()) {
findQuery = findQuery.withProjection(fieldNames);
}
readInstance = readInstance.withQueryFn(findQuery);
return readInstance.expand(begin).apply(DocumentToRow.withSchema(newSchema));
}
use of org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter in project beam by apache.
the class BeamIOPushDownRule method onMatch.
// ~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
final BeamIOSourceRel ioSourceRel = call.rel(1);
final BeamSqlTable beamSqlTable = ioSourceRel.getBeamSqlTable();
if (ioSourceRel instanceof BeamPushDownIOSourceRel) {
return;
}
// Nested rows are not supported at the moment
for (RelDataTypeField field : ioSourceRel.getRowType().getFieldList()) {
if (field.getType() instanceof RelRecordType) {
return;
}
}
final Calc calc = call.rel(0);
final RexProgram program = calc.getProgram();
final Pair<ImmutableList<RexNode>, ImmutableList<RexNode>> projectFilter = program.split();
final RelDataType calcInputRowType = program.getInputRowType();
// When predicate push-down is not supported - all filters are unsupported.
final BeamSqlTableFilter tableFilter = beamSqlTable.constructFilter(projectFilter.right);
if (!beamSqlTable.supportsProjects().isSupported() && tableFilter instanceof DefaultTableFilter) {
// Either project or filter push-down must be supported by the IO.
return;
}
Set<String> usedFields = new LinkedHashSet<>();
if (!(tableFilter instanceof DefaultTableFilter) && !beamSqlTable.supportsProjects().isSupported()) {
// When applying standalone filter push-down all fields must be project by an IO.
// With a single exception: Calc projects all fields (in the same order) and does nothing
// else.
usedFields.addAll(calcInputRowType.getFieldNames());
} else {
// Find all input refs used by projects
for (RexNode project : projectFilter.left) {
findUtilizedInputRefs(calcInputRowType, project, usedFields);
}
// Find all input refs used by filters
for (RexNode filter : tableFilter.getNotSupported()) {
findUtilizedInputRefs(calcInputRowType, filter, usedFields);
}
}
if (usedFields.isEmpty()) {
// No need to do push-down for queries like this: "select UPPER('hello')".
return;
}
// IO only projects fields utilized by a calc.
if (tableFilter.getNotSupported().containsAll(projectFilter.right) && usedFields.containsAll(ioSourceRel.getRowType().getFieldNames())) {
return;
}
FieldAccessDescriptor resolved = FieldAccessDescriptor.withFieldNames(usedFields);
resolved = resolved.resolve(beamSqlTable.getSchema());
if (canDropCalc(program, beamSqlTable.supportsProjects(), tableFilter)) {
call.transformTo(ioSourceRel.createPushDownRel(calc.getRowType(), resolved.getFieldsAccessed().stream().map(FieldDescriptor::getFieldName).collect(Collectors.toList()), tableFilter));
return;
}
// IO only projects fields utilised by a calc.
if (tableFilter.getNotSupported().equals(projectFilter.right) && usedFields.containsAll(ioSourceRel.getRowType().getFieldNames())) {
return;
}
RelNode result = constructNodesWithPushDown(resolved, call.builder(), ioSourceRel, tableFilter, calc.getRowType(), projectFilter.left);
if (tableFilter.getNotSupported().size() <= projectFilter.right.size() || usedFields.size() < calcInputRowType.getFieldCount()) {
// Smaller Calc programs are indisputably better, as well as IOs with less projected fields.
// We can consider something with the same number of filters.
call.transformTo(result);
}
}
use of org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter in project beam by apache.
the class SchemaIOTableProviderWrapperTest method testBuildIOReader_withProjectionPushdown.
@Test
public void testBuildIOReader_withProjectionPushdown() {
TestSchemaIOTableProviderWrapper provider = new TestSchemaIOTableProviderWrapper();
BeamSqlTable beamSqlTable = provider.buildBeamSqlTable(testTable);
PCollection<Row> result = beamSqlTable.buildIOReader(pipeline.begin(), new DefaultTableFilter(ImmutableList.of()), ImmutableList.of("f_long"));
Schema outputSchema = Schema.builder().addInt64Field("f_long").build();
PAssert.that(result).containsInAnyOrder(Row.withSchema(outputSchema).addValues(0L).build(), Row.withSchema(outputSchema).addValues(1L).build());
pipeline.run();
}
use of org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter in project beam by apache.
the class BigQueryTable method buildIOReader.
@Override
public PCollection<Row> buildIOReader(PBegin begin, BeamSqlTableFilter filters, List<String> fieldNames) {
if (!method.equals(Method.DIRECT_READ)) {
LOG.info("Predicate/project push-down only available for `DIRECT_READ` method, skipping.");
return buildIOReader(begin);
}
final FieldAccessDescriptor resolved = FieldAccessDescriptor.withFieldNames(fieldNames).resolve(getSchema());
final Schema newSchema = SelectHelpers.getOutputSchema(getSchema(), resolved);
TypedRead<Row> typedRead = getBigQueryTypedRead(newSchema);
if (!(filters instanceof DefaultTableFilter)) {
BigQueryFilter bigQueryFilter = (BigQueryFilter) filters;
if (!bigQueryFilter.getSupported().isEmpty()) {
String rowRestriction = generateRowRestrictions(getSchema(), bigQueryFilter.getSupported());
if (!rowRestriction.isEmpty()) {
LOG.info("Pushing down the following filter: " + rowRestriction);
typedRead = typedRead.withRowRestriction(rowRestriction);
}
}
}
if (!fieldNames.isEmpty()) {
typedRead = typedRead.withSelectedFields(fieldNames);
}
return begin.apply("Read Input BQ Rows with push-down", typedRead);
}
Aggregations