use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class ProjectionProducerVisitor method enterCompositeTransform.
@Override
public CompositeBehavior enterCompositeTransform(Node node) {
PTransform<?, ?> transform = node.getTransform();
// TODO(BEAM-13658) Support inputs other than PBegin.
if (!node.getInputs().isEmpty()) {
return CompositeBehavior.DO_NOT_ENTER_TRANSFORM;
}
if (!(transform instanceof ProjectionProducer)) {
return CompositeBehavior.ENTER_TRANSFORM;
}
ProjectionProducer<PTransform<?, ?>> pushdownProjector = (ProjectionProducer<PTransform<?, ?>>) transform;
if (!pushdownProjector.supportsProjectionPushdown()) {
return CompositeBehavior.ENTER_TRANSFORM;
}
ImmutableMap.Builder<PCollection<?>, FieldAccessDescriptor> builder = ImmutableMap.builder();
for (PCollection<?> output : node.getOutputs().values()) {
FieldAccessDescriptor fieldAccess = pCollectionFieldAccess.get(output);
if (fieldAccess != null && !fieldAccess.getAllFields()) {
builder.put(output, fieldAccess);
}
}
Map<PCollection<?>, FieldAccessDescriptor> localOpportunities = builder.build();
if (localOpportunities.isEmpty()) {
return CompositeBehavior.ENTER_TRANSFORM;
}
pushdownOpportunities.put(pushdownProjector, localOpportunities);
// If there are nested PushdownProjector implementations, apply only the outermost one.
return CompositeBehavior.DO_NOT_ENTER_TRANSFORM;
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class BeamIOPushDownRule method onMatch.
// ~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
final BeamIOSourceRel ioSourceRel = call.rel(1);
final BeamSqlTable beamSqlTable = ioSourceRel.getBeamSqlTable();
if (ioSourceRel instanceof BeamPushDownIOSourceRel) {
return;
}
// Nested rows are not supported at the moment
for (RelDataTypeField field : ioSourceRel.getRowType().getFieldList()) {
if (field.getType() instanceof RelRecordType) {
return;
}
}
final Calc calc = call.rel(0);
final RexProgram program = calc.getProgram();
final Pair<ImmutableList<RexNode>, ImmutableList<RexNode>> projectFilter = program.split();
final RelDataType calcInputRowType = program.getInputRowType();
// When predicate push-down is not supported - all filters are unsupported.
final BeamSqlTableFilter tableFilter = beamSqlTable.constructFilter(projectFilter.right);
if (!beamSqlTable.supportsProjects().isSupported() && tableFilter instanceof DefaultTableFilter) {
// Either project or filter push-down must be supported by the IO.
return;
}
Set<String> usedFields = new LinkedHashSet<>();
if (!(tableFilter instanceof DefaultTableFilter) && !beamSqlTable.supportsProjects().isSupported()) {
// When applying standalone filter push-down all fields must be project by an IO.
// With a single exception: Calc projects all fields (in the same order) and does nothing
// else.
usedFields.addAll(calcInputRowType.getFieldNames());
} else {
// Find all input refs used by projects
for (RexNode project : projectFilter.left) {
findUtilizedInputRefs(calcInputRowType, project, usedFields);
}
// Find all input refs used by filters
for (RexNode filter : tableFilter.getNotSupported()) {
findUtilizedInputRefs(calcInputRowType, filter, usedFields);
}
}
if (usedFields.isEmpty()) {
// No need to do push-down for queries like this: "select UPPER('hello')".
return;
}
// IO only projects fields utilized by a calc.
if (tableFilter.getNotSupported().containsAll(projectFilter.right) && usedFields.containsAll(ioSourceRel.getRowType().getFieldNames())) {
return;
}
FieldAccessDescriptor resolved = FieldAccessDescriptor.withFieldNames(usedFields);
resolved = resolved.resolve(beamSqlTable.getSchema());
if (canDropCalc(program, beamSqlTable.supportsProjects(), tableFilter)) {
call.transformTo(ioSourceRel.createPushDownRel(calc.getRowType(), resolved.getFieldsAccessed().stream().map(FieldDescriptor::getFieldName).collect(Collectors.toList()), tableFilter));
return;
}
// IO only projects fields utilised by a calc.
if (tableFilter.getNotSupported().equals(projectFilter.right) && usedFields.containsAll(ioSourceRel.getRowType().getFieldNames())) {
return;
}
RelNode result = constructNodesWithPushDown(resolved, call.builder(), ioSourceRel, tableFilter, calc.getRowType(), projectFilter.left);
if (tableFilter.getNotSupported().size() <= projectFilter.right.size() || usedFields.size() < calcInputRowType.getFieldCount()) {
// Smaller Calc programs are indisputably better, as well as IOs with less projected fields.
// We can consider something with the same number of filters.
call.transformTo(result);
}
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class BeamJoinTransforms method getJoinColumn.
private static FieldAccessDescriptor getJoinColumn(SerializableRexNode serializableRexNode, int leftRowColumnCount) {
if (serializableRexNode instanceof SerializableRexInputRef) {
SerializableRexInputRef inputRef = (SerializableRexInputRef) serializableRexNode;
return FieldAccessDescriptor.withFieldIds(inputRef.getIndex() - leftRowColumnCount);
} else {
// It can only be SerializableFieldAccess.
List<Integer> indexes = ((SerializableRexFieldAccess) serializableRexNode).getIndexes();
FieldAccessDescriptor fieldAccessDescriptor = FieldAccessDescriptor.withFieldIds(indexes.get(0) - leftRowColumnCount);
for (int i = 1; i < indexes.size(); i++) {
fieldAccessDescriptor = FieldAccessDescriptor.withFieldIds(fieldAccessDescriptor, indexes.get(i));
}
return fieldAccessDescriptor;
}
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class BeamZetaSqlCalcRelTest method testNoFieldAccess.
@Test
public void testNoFieldAccess() throws IllegalAccessException {
String sql = "SELECT 1 FROM KeyValue";
PCollection<Row> rows = compile(sql);
final NodeGetter nodeGetter = new NodeGetter(rows);
pipeline.traverseTopologically(nodeGetter);
ParDo.MultiOutput<Row, Row> pardo = (ParDo.MultiOutput<Row, Row>) nodeGetter.producer.getTransform();
PCollection<Row> input = (PCollection<Row>) Iterables.getOnlyElement(nodeGetter.producer.getInputs().values());
DoFnSchemaInformation info = ParDo.getDoFnSchemaInformation(pardo.getFn(), input);
FieldAccessDescriptor fieldAccess = info.getFieldAccessDescriptor();
Assert.assertFalse(fieldAccess.getAllFields());
Assert.assertTrue(fieldAccess.getFieldsAccessed().isEmpty());
Assert.assertTrue(fieldAccess.getNestedFieldsAccessed().isEmpty());
pipeline.run().waitUntilFinish();
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class BeamCalcRelTest method testSingleFieldAccess.
@Test
public void testSingleFieldAccess() throws IllegalAccessException {
String sql = "SELECT order_id FROM ORDER_DETAILS_BOUNDED";
PCollection<Row> rows = compilePipeline(sql, pipeline);
final NodeGetter nodeGetter = new NodeGetter(rows);
pipeline.traverseTopologically(nodeGetter);
ParDo.MultiOutput<Row, Row> pardo = (ParDo.MultiOutput<Row, Row>) nodeGetter.producer.getTransform();
PCollection<Row> input = (PCollection<Row>) Iterables.getOnlyElement(nodeGetter.producer.getInputs().values());
DoFnSchemaInformation info = ParDo.getDoFnSchemaInformation(pardo.getFn(), input);
FieldAccessDescriptor fieldAccess = info.getFieldAccessDescriptor();
Assert.assertTrue(fieldAccess.referencesSingleField());
Assert.assertEquals("order_id", Iterables.getOnlyElement(fieldAccess.fieldNamesAccessed()));
pipeline.run().waitUntilFinish();
}
Aggregations