Search in sources :

Example 26 with FieldAccessDescriptor

use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.

the class ProjectionProducerVisitor method enterCompositeTransform.

@Override
public CompositeBehavior enterCompositeTransform(Node node) {
    PTransform<?, ?> transform = node.getTransform();
    // TODO(BEAM-13658) Support inputs other than PBegin.
    if (!node.getInputs().isEmpty()) {
        return CompositeBehavior.DO_NOT_ENTER_TRANSFORM;
    }
    if (!(transform instanceof ProjectionProducer)) {
        return CompositeBehavior.ENTER_TRANSFORM;
    }
    ProjectionProducer<PTransform<?, ?>> pushdownProjector = (ProjectionProducer<PTransform<?, ?>>) transform;
    if (!pushdownProjector.supportsProjectionPushdown()) {
        return CompositeBehavior.ENTER_TRANSFORM;
    }
    ImmutableMap.Builder<PCollection<?>, FieldAccessDescriptor> builder = ImmutableMap.builder();
    for (PCollection<?> output : node.getOutputs().values()) {
        FieldAccessDescriptor fieldAccess = pCollectionFieldAccess.get(output);
        if (fieldAccess != null && !fieldAccess.getAllFields()) {
            builder.put(output, fieldAccess);
        }
    }
    Map<PCollection<?>, FieldAccessDescriptor> localOpportunities = builder.build();
    if (localOpportunities.isEmpty()) {
        return CompositeBehavior.ENTER_TRANSFORM;
    }
    pushdownOpportunities.put(pushdownProjector, localOpportunities);
    // If there are nested PushdownProjector implementations, apply only the outermost one.
    return CompositeBehavior.DO_NOT_ENTER_TRANSFORM;
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) ProjectionProducer(org.apache.beam.sdk.schemas.ProjectionProducer) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) PTransform(org.apache.beam.sdk.transforms.PTransform)

Example 27 with FieldAccessDescriptor

use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.

the class BeamIOPushDownRule method onMatch.

// ~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
    final BeamIOSourceRel ioSourceRel = call.rel(1);
    final BeamSqlTable beamSqlTable = ioSourceRel.getBeamSqlTable();
    if (ioSourceRel instanceof BeamPushDownIOSourceRel) {
        return;
    }
    // Nested rows are not supported at the moment
    for (RelDataTypeField field : ioSourceRel.getRowType().getFieldList()) {
        if (field.getType() instanceof RelRecordType) {
            return;
        }
    }
    final Calc calc = call.rel(0);
    final RexProgram program = calc.getProgram();
    final Pair<ImmutableList<RexNode>, ImmutableList<RexNode>> projectFilter = program.split();
    final RelDataType calcInputRowType = program.getInputRowType();
    // When predicate push-down is not supported - all filters are unsupported.
    final BeamSqlTableFilter tableFilter = beamSqlTable.constructFilter(projectFilter.right);
    if (!beamSqlTable.supportsProjects().isSupported() && tableFilter instanceof DefaultTableFilter) {
        // Either project or filter push-down must be supported by the IO.
        return;
    }
    Set<String> usedFields = new LinkedHashSet<>();
    if (!(tableFilter instanceof DefaultTableFilter) && !beamSqlTable.supportsProjects().isSupported()) {
        // When applying standalone filter push-down all fields must be project by an IO.
        // With a single exception: Calc projects all fields (in the same order) and does nothing
        // else.
        usedFields.addAll(calcInputRowType.getFieldNames());
    } else {
        // Find all input refs used by projects
        for (RexNode project : projectFilter.left) {
            findUtilizedInputRefs(calcInputRowType, project, usedFields);
        }
        // Find all input refs used by filters
        for (RexNode filter : tableFilter.getNotSupported()) {
            findUtilizedInputRefs(calcInputRowType, filter, usedFields);
        }
    }
    if (usedFields.isEmpty()) {
        // No need to do push-down for queries like this: "select UPPER('hello')".
        return;
    }
    // IO only projects fields utilized by a calc.
    if (tableFilter.getNotSupported().containsAll(projectFilter.right) && usedFields.containsAll(ioSourceRel.getRowType().getFieldNames())) {
        return;
    }
    FieldAccessDescriptor resolved = FieldAccessDescriptor.withFieldNames(usedFields);
    resolved = resolved.resolve(beamSqlTable.getSchema());
    if (canDropCalc(program, beamSqlTable.supportsProjects(), tableFilter)) {
        call.transformTo(ioSourceRel.createPushDownRel(calc.getRowType(), resolved.getFieldsAccessed().stream().map(FieldDescriptor::getFieldName).collect(Collectors.toList()), tableFilter));
        return;
    }
    // IO only projects fields utilised by a calc.
    if (tableFilter.getNotSupported().equals(projectFilter.right) && usedFields.containsAll(ioSourceRel.getRowType().getFieldNames())) {
        return;
    }
    RelNode result = constructNodesWithPushDown(resolved, call.builder(), ioSourceRel, tableFilter, calc.getRowType(), projectFilter.left);
    if (tableFilter.getNotSupported().size() <= projectFilter.right.size() || usedFields.size() < calcInputRowType.getFieldCount()) {
        // Smaller Calc programs are indisputably better, as well as IOs with less projected fields.
        // We can consider something with the same number of filters.
        call.transformTo(result);
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) BeamPushDownIOSourceRel(org.apache.beam.sdk.extensions.sql.impl.rel.BeamPushDownIOSourceRel) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) RexProgram(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexProgram) ImmutableList(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableList) Calc(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Calc) RelDataType(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelDataType) RelRecordType(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelRecordType) RelDataTypeField(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.type.RelDataTypeField) BeamSqlTableFilter(org.apache.beam.sdk.extensions.sql.meta.BeamSqlTableFilter) RelNode(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelNode) BeamSqlTable(org.apache.beam.sdk.extensions.sql.meta.BeamSqlTable) BeamIOSourceRel(org.apache.beam.sdk.extensions.sql.impl.rel.BeamIOSourceRel) DefaultTableFilter(org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter) RexNode(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexNode)

Example 28 with FieldAccessDescriptor

use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.

the class BeamJoinTransforms method getJoinColumn.

private static FieldAccessDescriptor getJoinColumn(SerializableRexNode serializableRexNode, int leftRowColumnCount) {
    if (serializableRexNode instanceof SerializableRexInputRef) {
        SerializableRexInputRef inputRef = (SerializableRexInputRef) serializableRexNode;
        return FieldAccessDescriptor.withFieldIds(inputRef.getIndex() - leftRowColumnCount);
    } else {
        // It can only be SerializableFieldAccess.
        List<Integer> indexes = ((SerializableRexFieldAccess) serializableRexNode).getIndexes();
        FieldAccessDescriptor fieldAccessDescriptor = FieldAccessDescriptor.withFieldIds(indexes.get(0) - leftRowColumnCount);
        for (int i = 1; i < indexes.size(); i++) {
            fieldAccessDescriptor = FieldAccessDescriptor.withFieldIds(fieldAccessDescriptor, indexes.get(i));
        }
        return fieldAccessDescriptor;
    }
}
Also used : SerializableRexInputRef(org.apache.beam.sdk.extensions.sql.impl.utils.SerializableRexInputRef) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) SerializableRexFieldAccess(org.apache.beam.sdk.extensions.sql.impl.utils.SerializableRexFieldAccess)

Example 29 with FieldAccessDescriptor

use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.

the class BeamZetaSqlCalcRelTest method testNoFieldAccess.

@Test
public void testNoFieldAccess() throws IllegalAccessException {
    String sql = "SELECT 1 FROM KeyValue";
    PCollection<Row> rows = compile(sql);
    final NodeGetter nodeGetter = new NodeGetter(rows);
    pipeline.traverseTopologically(nodeGetter);
    ParDo.MultiOutput<Row, Row> pardo = (ParDo.MultiOutput<Row, Row>) nodeGetter.producer.getTransform();
    PCollection<Row> input = (PCollection<Row>) Iterables.getOnlyElement(nodeGetter.producer.getInputs().values());
    DoFnSchemaInformation info = ParDo.getDoFnSchemaInformation(pardo.getFn(), input);
    FieldAccessDescriptor fieldAccess = info.getFieldAccessDescriptor();
    Assert.assertFalse(fieldAccess.getAllFields());
    Assert.assertTrue(fieldAccess.getFieldsAccessed().isEmpty());
    Assert.assertTrue(fieldAccess.getNestedFieldsAccessed().isEmpty());
    pipeline.run().waitUntilFinish();
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) ParDo(org.apache.beam.sdk.transforms.ParDo) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 30 with FieldAccessDescriptor

use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.

the class BeamCalcRelTest method testSingleFieldAccess.

@Test
public void testSingleFieldAccess() throws IllegalAccessException {
    String sql = "SELECT order_id FROM ORDER_DETAILS_BOUNDED";
    PCollection<Row> rows = compilePipeline(sql, pipeline);
    final NodeGetter nodeGetter = new NodeGetter(rows);
    pipeline.traverseTopologically(nodeGetter);
    ParDo.MultiOutput<Row, Row> pardo = (ParDo.MultiOutput<Row, Row>) nodeGetter.producer.getTransform();
    PCollection<Row> input = (PCollection<Row>) Iterables.getOnlyElement(nodeGetter.producer.getInputs().values());
    DoFnSchemaInformation info = ParDo.getDoFnSchemaInformation(pardo.getFn(), input);
    FieldAccessDescriptor fieldAccess = info.getFieldAccessDescriptor();
    Assert.assertTrue(fieldAccess.referencesSingleField());
    Assert.assertEquals("order_id", Iterables.getOnlyElement(fieldAccess.fieldNamesAccessed()));
    pipeline.run().waitUntilFinish();
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) ParDo(org.apache.beam.sdk.transforms.ParDo) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Aggregations

FieldAccessDescriptor (org.apache.beam.sdk.schemas.FieldAccessDescriptor)65 Test (org.junit.Test)49 Row (org.apache.beam.sdk.values.Row)47 Schema (org.apache.beam.sdk.schemas.Schema)42 PCollection (org.apache.beam.sdk.values.PCollection)16 Map (java.util.Map)12 Pipeline (org.apache.beam.sdk.Pipeline)11 ProjectionProducer (org.apache.beam.sdk.schemas.ProjectionProducer)9 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)8 ParDo (org.apache.beam.sdk.transforms.ParDo)5 DoFnSchemaInformation (org.apache.beam.sdk.transforms.DoFnSchemaInformation)4 PBegin (org.apache.beam.sdk.values.PBegin)4 DefaultTableFilter (org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter)3 FieldType (org.apache.beam.sdk.schemas.Schema.FieldType)3 PTransform (org.apache.beam.sdk.transforms.PTransform)3 List (java.util.List)2 Collectors (java.util.stream.Collectors)2 AutoValueSchema (org.apache.beam.sdk.schemas.AutoValueSchema)2 FieldDescriptor (org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor)2 Field (org.apache.beam.sdk.schemas.Schema.Field)2