Search in sources :

Example 1 with DefaultTableFilter

use of org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter in project beam by apache.

the class MongoDbTable method buildIOReader.

@Override
public PCollection<Row> buildIOReader(PBegin begin, BeamSqlTableFilter filters, List<String> fieldNames) {
    MongoDbIO.Read readInstance = MongoDbIO.read().withUri(dbUri).withDatabase(dbName).withCollection(dbCollection);
    final FieldAccessDescriptor resolved = FieldAccessDescriptor.withFieldNames(fieldNames).resolve(getSchema());
    final Schema newSchema = SelectHelpers.getOutputSchema(getSchema(), resolved);
    FindQuery findQuery = FindQuery.create();
    if (!(filters instanceof DefaultTableFilter)) {
        MongoDbFilter mongoFilter = (MongoDbFilter) filters;
        if (!mongoFilter.getSupported().isEmpty()) {
            Bson filter = constructPredicate(mongoFilter.getSupported());
            LOG.info("Pushing down the following filter: " + filter.toString());
            findQuery = findQuery.withFilters(filter);
        }
    }
    if (!fieldNames.isEmpty()) {
        findQuery = findQuery.withProjection(fieldNames);
    }
    readInstance = readInstance.withQueryFn(findQuery);
    return readInstance.expand(begin).apply(DocumentToRow.withSchema(newSchema));
}
Also used : FindQuery(org.apache.beam.sdk.io.mongodb.FindQuery) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) Schema(org.apache.beam.sdk.schemas.Schema) DefaultTableFilter(org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter) MongoDbIO(org.apache.beam.sdk.io.mongodb.MongoDbIO) Bson(org.bson.conversions.Bson)

Example 2 with DefaultTableFilter

use of org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter in project beam by apache.

the class BeamIOPushDownRule method onMatch.

// ~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
    final BeamIOSourceRel ioSourceRel = call.rel(1);
    final BeamSqlTable beamSqlTable = ioSourceRel.getBeamSqlTable();
    if (ioSourceRel instanceof BeamPushDownIOSourceRel) {
        return;
    }
    // Nested rows are not supported at the moment
    for (RelDataTypeField field : ioSourceRel.getRowType().getFieldList()) {
        if (field.getType() instanceof RelRecordType) {
            return;
        }
    }
    final Calc calc = call.rel(0);
    final RexProgram program = calc.getProgram();
    final Pair<ImmutableList<RexNode>, ImmutableList<RexNode>> projectFilter = program.split();
    final RelDataType calcInputRowType = program.getInputRowType();
    // When predicate push-down is not supported - all filters are unsupported.
    final BeamSqlTableFilter tableFilter = beamSqlTable.constructFilter(projectFilter.right);
    if (!beamSqlTable.supportsProjects().isSupported() && tableFilter instanceof DefaultTableFilter) {
        // Either project or filter push-down must be supported by the IO.
        return;
    }
    Set<String> usedFields = new LinkedHashSet<>();
    if (!(tableFilter instanceof DefaultTableFilter) && !beamSqlTable.supportsProjects().isSupported()) {
        // When applying standalone filter push-down all fields must be project by an IO.
        // With a single exception: Calc projects all fields (in the same order) and does nothing
        // else.
        usedFields.addAll(calcInputRowType.getFieldNames());
    } else {
        // Find all input refs used by projects
        for (RexNode project : projectFilter.left) {
            findUtilizedInputRefs(calcInputRowType, project, usedFields);
        }
        // Find all input refs used by filters
        for (RexNode filter : tableFilter.getNotSupported()) {
            findUtilizedInputRefs(calcInputRowType, filter, usedFields);
        }
    }
    if (usedFields.isEmpty()) {
        // No need to do push-down for queries like this: "select UPPER('hello')".
        return;
    }
    // IO only projects fields utilized by a calc.
    if (tableFilter.getNotSupported().containsAll(projectFilter.right) && usedFields.containsAll(ioSourceRel.getRowType().getFieldNames())) {
        return;
    }
    FieldAccessDescriptor resolved = FieldAccessDescriptor.withFieldNames(usedFields);
    resolved = resolved.resolve(beamSqlTable.getSchema());
    if (canDropCalc(program, beamSqlTable.supportsProjects(), tableFilter)) {
        call.transformTo(ioSourceRel.createPushDownRel(calc.getRowType(), resolved.getFieldsAccessed().stream().map(FieldDescriptor::getFieldName).collect(Collectors.toList()), tableFilter));
        return;
    }
    // IO only projects fields utilised by a calc.
    if (tableFilter.getNotSupported().equals(projectFilter.right) && usedFields.containsAll(ioSourceRel.getRowType().getFieldNames())) {
        return;
    }
    RelNode result = constructNodesWithPushDown(resolved, call.builder(), ioSourceRel, tableFilter, calc.getRowType(), projectFilter.left);
    if (tableFilter.getNotSupported().size() <= projectFilter.right.size() || usedFields.size() < calcInputRowType.getFieldCount()) {
        // Smaller Calc programs are indisputably better, as well as IOs with less projected fields.
        // We can consider something with the same number of filters.
        call.transformTo(result);
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) BeamPushDownIOSourceRel(org.apache.beam.sdk.extensions.sql.impl.rel.BeamPushDownIOSourceRel) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) RexProgram(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexProgram) ImmutableList(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableList) Calc(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Calc) RelDataType(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelDataType) RelRecordType(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelRecordType) RelDataTypeField(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelDataTypeField) BeamSqlTableFilter(org.apache.beam.sdk.extensions.sql.meta.BeamSqlTableFilter) RelNode(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelNode) BeamSqlTable(org.apache.beam.sdk.extensions.sql.meta.BeamSqlTable) BeamIOSourceRel(org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel) DefaultTableFilter(org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter) RexNode(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexNode)

Example 3 with DefaultTableFilter

use of org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter in project beam by apache.

the class SchemaIOTableProviderWrapperTest method testBuildIOReader_withProjectionPushdown.

@Test
public void testBuildIOReader_withProjectionPushdown() {
    TestSchemaIOTableProviderWrapper provider = new TestSchemaIOTableProviderWrapper();
    BeamSqlTable beamSqlTable = provider.buildBeamSqlTable(testTable);
    PCollection<Row> result = beamSqlTable.buildIOReader(pipeline.begin(), new DefaultTableFilter(ImmutableList.of()), ImmutableList.of("f_long"));
    Schema outputSchema = Schema.builder().addInt64Field("f_long").build();
    PAssert.that(result).containsInAnyOrder(Row.withSchema(outputSchema).addValues(0L).build(), Row.withSchema(outputSchema).addValues(1L).build());
    pipeline.run();
}
Also used : BeamSqlTable(org.apache.beam.sdk.extensions.sql.meta.BeamSqlTable) Schema(org.apache.beam.sdk.schemas.Schema) DefaultTableFilter(org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 4 with DefaultTableFilter

use of org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter in project beam by apache.

the class BigQueryTable method buildIOReader.

@Override
public PCollection<Row> buildIOReader(PBegin begin, BeamSqlTableFilter filters, List<String> fieldNames) {
    if (!method.equals(Method.DIRECT_READ)) {
        LOG.info("Predicate/project push-down only available for `DIRECT_READ` method, skipping.");
        return buildIOReader(begin);
    }
    final FieldAccessDescriptor resolved = FieldAccessDescriptor.withFieldNames(fieldNames).resolve(getSchema());
    final Schema newSchema = SelectHelpers.getOutputSchema(getSchema(), resolved);
    TypedRead<Row> typedRead = getBigQueryTypedRead(newSchema);
    if (!(filters instanceof DefaultTableFilter)) {
        BigQueryFilter bigQueryFilter = (BigQueryFilter) filters;
        if (!bigQueryFilter.getSupported().isEmpty()) {
            String rowRestriction = generateRowRestrictions(getSchema(), bigQueryFilter.getSupported());
            if (!rowRestriction.isEmpty()) {
                LOG.info("Pushing down the following filter: " + rowRestriction);
                typedRead = typedRead.withRowRestriction(rowRestriction);
            }
        }
    }
    if (!fieldNames.isEmpty()) {
        typedRead = typedRead.withSelectedFields(fieldNames);
    }
    return begin.apply("Read Input BQ Rows with push-down", typedRead);
}
Also used : FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) Schema(org.apache.beam.sdk.schemas.Schema) DefaultTableFilter(org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter) Row(org.apache.beam.sdk.values.Row)

Aggregations

DefaultTableFilter (org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter)4 FieldAccessDescriptor (org.apache.beam.sdk.schemas.FieldAccessDescriptor)3 Schema (org.apache.beam.sdk.schemas.Schema)3 BeamSqlTable (org.apache.beam.sdk.extensions.sql.meta.BeamSqlTable)2 Row (org.apache.beam.sdk.values.Row)2 LinkedHashSet (java.util.LinkedHashSet)1 BeamIOSourceRel (org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel)1 BeamPushDownIOSourceRel (org.apache.beam.sdk.extensions.sql.impl.rel.BeamPushDownIOSourceRel)1 BeamSqlTableFilter (org.apache.beam.sdk.extensions.sql.meta.BeamSqlTableFilter)1 FindQuery (org.apache.beam.sdk.io.mongodb.FindQuery)1 MongoDbIO (org.apache.beam.sdk.io.mongodb.MongoDbIO)1 ImmutableList (org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableList)1 RelNode (org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelNode)1 Calc (org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Calc)1 RelDataType (org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelDataType)1 RelDataTypeField (org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelDataTypeField)1 RelRecordType (org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelRecordType)1 RexNode (org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexNode)1 RexProgram (org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexProgram)1 Bson (org.bson.conversions.Bson)1