use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class BeamSideInputJoinRel method sideInputJoin.
public PCollection<Row> sideInputJoin(PCollection<Row> leftRows, PCollection<Row> rightRows, FieldAccessDescriptor leftKeyFields, FieldAccessDescriptor rightKeyFields) {
// we always make the Unbounded table on the left to do the sideInput join
// (will convert the result accordingly before return)
boolean swapped = (leftRows.isBounded() == PCollection.IsBounded.BOUNDED);
JoinRelType realJoinType = joinType;
if (swapped && joinType != JoinRelType.INNER) {
Preconditions.checkArgument(realJoinType != JoinRelType.LEFT);
realJoinType = JoinRelType.LEFT;
}
PCollection<Row> realLeftRows = swapped ? rightRows : leftRows;
PCollection<Row> realRightRows = swapped ? leftRows : rightRows;
FieldAccessDescriptor realLeftKeyFields = swapped ? rightKeyFields : leftKeyFields;
FieldAccessDescriptor realRightKeyFields = swapped ? leftKeyFields : rightKeyFields;
PCollection<Row> joined;
switch(realJoinType) {
case INNER:
joined = realLeftRows.apply(org.apache.beam.sdk.schemas.transforms.Join.<Row, Row>innerBroadcastJoin(realRightRows).on(FieldsEqual.left(realLeftKeyFields).right(realRightKeyFields)));
break;
case LEFT:
joined = realLeftRows.apply(org.apache.beam.sdk.schemas.transforms.Join.<Row, Row>leftOuterBroadcastJoin(realRightRows).on(FieldsEqual.left(realLeftKeyFields).right(realRightKeyFields)));
break;
default:
throw new RuntimeException("Unexpected join type " + realJoinType);
}
Schema schema = CalciteUtils.toSchema(getRowType());
String lhsSelect = org.apache.beam.sdk.schemas.transforms.Join.LHS_TAG + ".*";
String rhsSelect = org.apache.beam.sdk.schemas.transforms.Join.RHS_TAG + ".*";
PCollection<Row> selected = !swapped ? joined.apply(Select.<Row>fieldNames(lhsSelect, rhsSelect).withOutputSchema(schema)) : joined.apply(Select.<Row>fieldNames(rhsSelect, lhsSelect).withOutputSchema(schema));
return selected;
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class BigQueryTable method buildIOReader.
@Override
public PCollection<Row> buildIOReader(PBegin begin, BeamSqlTableFilter filters, List<String> fieldNames) {
if (!method.equals(Method.DIRECT_READ)) {
LOG.info("Predicate/project push-down only available for `DIRECT_READ` method, skipping.");
return buildIOReader(begin);
}
final FieldAccessDescriptor resolved = FieldAccessDescriptor.withFieldNames(fieldNames).resolve(getSchema());
final Schema newSchema = SelectHelpers.getOutputSchema(getSchema(), resolved);
TypedRead<Row> typedRead = getBigQueryTypedRead(newSchema);
if (!(filters instanceof DefaultTableFilter)) {
BigQueryFilter bigQueryFilter = (BigQueryFilter) filters;
if (!bigQueryFilter.getSupported().isEmpty()) {
String rowRestriction = generateRowRestrictions(getSchema(), bigQueryFilter.getSupported());
if (!rowRestriction.isEmpty()) {
LOG.info("Pushing down the following filter: " + rowRestriction);
typedRead = typedRead.withRowRestriction(rowRestriction);
}
}
}
if (!fieldNames.isEmpty()) {
typedRead = typedRead.withSelectedFields(fieldNames);
}
return begin.apply("Read Input BQ Rows with push-down", typedRead);
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class ProjectionPushdownOptimizerTest method testSimpleProjectionPushdown.
@Test
public void testSimpleProjectionPushdown() {
Pipeline p = Pipeline.create();
SimpleSourceWithPushdown originalSource = new SimpleSourceWithPushdown(FieldAccessDescriptor.withFieldNames("foo", "bar", "baz"));
FieldAccessDescriptor downstreamFieldAccess = FieldAccessDescriptor.withFieldNames("foo", "bar");
p.apply(originalSource).apply(new FieldAccessTransform(downstreamFieldAccess));
SimpleSourceWithPushdown expectedSource = new SimpleSourceWithPushdown(downstreamFieldAccess);
ProjectionPushdownOptimizer.optimize(p);
Assert.assertTrue(pipelineHasTransform(p, expectedSource));
Assert.assertFalse(pipelineHasTransform(p, originalSource));
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class ProjectionProducerVisitorTest method testMissingFieldAccessInformation_returnsNoPushdown.
@Test
public void testMissingFieldAccessInformation_returnsNoPushdown() {
Pipeline p = Pipeline.create();
p.apply(new SimpleSource());
Map<PCollection<?>, FieldAccessDescriptor> pCollectionFieldAccess = ImmutableMap.of();
ProjectionProducerVisitor visitor = new ProjectionProducerVisitor(pCollectionFieldAccess);
p.traverseTopologically(visitor);
Map<ProjectionProducer<PTransform<?, ?>>, Map<PCollection<?>, FieldAccessDescriptor>> pushdownOpportunities = visitor.getPushdownOpportunities();
Assert.assertTrue(pushdownOpportunities.isEmpty());
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class ProjectionProducerVisitorTest method testNestedPushdownProducers_returnsOnlyOutermostPushdown.
@Test
public void testNestedPushdownProducers_returnsOnlyOutermostPushdown() {
Pipeline p = Pipeline.create();
PTransform<PBegin, PCollection<Row>> source = new CompositeTransformWithPushdownOutside();
PCollection<Row> output = p.apply(source);
Map<PCollection<?>, FieldAccessDescriptor> pCollectionFieldAccess = ImmutableMap.of(output, FieldAccessDescriptor.withFieldNames("field1", "field2"));
ProjectionProducerVisitor visitor = new ProjectionProducerVisitor(pCollectionFieldAccess);
p.traverseTopologically(visitor);
Map<ProjectionProducer<PTransform<?, ?>>, Map<PCollection<?>, FieldAccessDescriptor>> pushdownOpportunities = visitor.getPushdownOpportunities();
Assert.assertEquals(1, pushdownOpportunities.size());
Map<PCollection<?>, FieldAccessDescriptor> opportunitiesForSource = pushdownOpportunities.get(source);
Assert.assertNotNull(opportunitiesForSource);
Assert.assertEquals(1, opportunitiesForSource.size());
FieldAccessDescriptor fieldAccessDescriptor = opportunitiesForSource.get(output);
Assert.assertNotNull(fieldAccessDescriptor);
Assert.assertFalse(fieldAccessDescriptor.getAllFields());
assertThat(fieldAccessDescriptor.fieldNamesAccessed(), containsInAnyOrder("field1", "field2"));
}
Aggregations