use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class ProjectionProducerVisitorTest method testSimplePushdownProducer_returnsOnePushdown.
@Test
public void testSimplePushdownProducer_returnsOnePushdown() {
Pipeline p = Pipeline.create();
PTransform<PBegin, PCollection<Row>> source = new SimpleSourceWithPushdown();
PCollection<Row> output = p.apply(source);
Map<PCollection<?>, FieldAccessDescriptor> pCollectionFieldAccess = ImmutableMap.of(output, FieldAccessDescriptor.withFieldNames("field1", "field2"));
ProjectionProducerVisitor visitor = new ProjectionProducerVisitor(pCollectionFieldAccess);
p.traverseTopologically(visitor);
Map<ProjectionProducer<PTransform<?, ?>>, Map<PCollection<?>, FieldAccessDescriptor>> pushdownOpportunities = visitor.getPushdownOpportunities();
Assert.assertEquals(1, pushdownOpportunities.size());
Map<PCollection<?>, FieldAccessDescriptor> opportunitiesForSource = pushdownOpportunities.get(source);
Assert.assertNotNull(opportunitiesForSource);
Assert.assertEquals(1, opportunitiesForSource.size());
FieldAccessDescriptor fieldAccessDescriptor = opportunitiesForSource.get(output);
Assert.assertNotNull(fieldAccessDescriptor);
Assert.assertFalse(fieldAccessDescriptor.getAllFields());
assertThat(fieldAccessDescriptor.fieldNamesAccessed(), containsInAnyOrder("field1", "field2"));
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class ProjectionProducerVisitorTest method testProjectionProducerInsideNonProducer_returnsInnerPushdown.
@Test
public void testProjectionProducerInsideNonProducer_returnsInnerPushdown() {
Pipeline p = Pipeline.create();
CompositeTransformWithPushdownInside source = new CompositeTransformWithPushdownInside();
PCollection<Row> output = p.apply(source);
Map<PCollection<?>, FieldAccessDescriptor> pCollectionFieldAccess = ImmutableMap.of(output, FieldAccessDescriptor.withFieldNames("field1", "field2"));
ProjectionProducerVisitor visitor = new ProjectionProducerVisitor(pCollectionFieldAccess);
p.traverseTopologically(visitor);
Map<ProjectionProducer<PTransform<?, ?>>, Map<PCollection<?>, FieldAccessDescriptor>> pushdownOpportunities = visitor.getPushdownOpportunities();
Assert.assertEquals(1, pushdownOpportunities.size());
Map<PCollection<?>, FieldAccessDescriptor> opportunitiesForSource = pushdownOpportunities.get(source.innerT);
Assert.assertNotNull(opportunitiesForSource);
Assert.assertEquals(1, opportunitiesForSource.size());
FieldAccessDescriptor fieldAccessDescriptor = opportunitiesForSource.get(output);
Assert.assertNotNull(fieldAccessDescriptor);
Assert.assertFalse(fieldAccessDescriptor.getAllFields());
assertThat(fieldAccessDescriptor.fieldNamesAccessed(), containsInAnyOrder("field1", "field2"));
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class ProjectionProducerVisitorTest method testFieldAccessAllFields_returnsNoPushdown.
@Test
public void testFieldAccessAllFields_returnsNoPushdown() {
Pipeline p = Pipeline.create();
PCollection<Row> output = p.apply(new SimpleSource());
Map<PCollection<?>, FieldAccessDescriptor> pCollectionFieldAccess = ImmutableMap.of(output, FieldAccessDescriptor.withAllFields());
ProjectionProducerVisitor visitor = new ProjectionProducerVisitor(pCollectionFieldAccess);
p.traverseTopologically(visitor);
Map<ProjectionProducer<PTransform<?, ?>>, Map<PCollection<?>, FieldAccessDescriptor>> pushdownOpportunities = visitor.getPushdownOpportunities();
Assert.assertTrue(pushdownOpportunities.isEmpty());
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class SelectHelpers method selectIntoRow.
/**
* Select out of a given {@link Row} object.
*/
private static void selectIntoRow(Schema inputSchema, Row input, Row.Builder output, FieldAccessDescriptor fieldAccessDescriptor) {
if (fieldAccessDescriptor.getAllFields()) {
List<Object> values = (input != null) ? input.getValues() : Collections.nCopies(inputSchema.getFieldCount(), null);
output.addValues(values);
return;
}
for (int fieldId : fieldAccessDescriptor.fieldIdsAccessed()) {
// TODO: Once we support specific qualifiers (like array slices), extract them here.
output.addValue((input != null) ? input.getValue(fieldId) : null);
}
Schema outputSchema = output.getSchema();
for (Map.Entry<FieldDescriptor, FieldAccessDescriptor> nested : fieldAccessDescriptor.getNestedFieldsAccessed().entrySet()) {
FieldDescriptor field = nested.getKey();
FieldAccessDescriptor nestedAccess = nested.getValue();
FieldType nestedInputType = inputSchema.getField(field.getFieldId()).getType();
FieldType nestedOutputType = outputSchema.getField(output.nextFieldId()).getType();
selectIntoRowWithQualifiers(field.getQualifiers(), 0, input.getValue(field.getFieldId()), output, nestedAccess, nestedInputType, nestedOutputType);
}
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class SelectHelpers method getOutputSchemaTrackingNullable.
private static Schema getOutputSchemaTrackingNullable(Schema inputSchema, FieldAccessDescriptor fieldAccessDescriptor, boolean isNullable) {
if (fieldAccessDescriptor.getAllFields()) {
Schema schemaToReturn = inputSchema;
if (isNullable) {
// Some parent field in the selector was nullable, so we must mark all of these fields
// nullable.
schemaToReturn = inputSchema.getFields().stream().map(f -> f.withNullable(true)).collect(Schema.toSchema());
}
return schemaToReturn;
}
List<Schema> schemas = Lists.newArrayList();
Schema.Builder builder = Schema.builder();
for (FieldDescriptor fieldDescriptor : fieldAccessDescriptor.getFieldsAccessed()) {
Field field = inputSchema.getField(fieldDescriptor.getFieldId());
if (fieldDescriptor.getFieldRename() != null) {
field = field.withName(fieldDescriptor.getFieldRename());
}
// be nullable if a is nullable (even if b was not in the original schema).
if (isNullable) {
field = field.withNullable(true);
}
builder.addField(field);
}
schemas.add(builder.build());
for (Map.Entry<FieldDescriptor, FieldAccessDescriptor> nested : fieldAccessDescriptor.getNestedFieldsAccessed().entrySet()) {
FieldDescriptor fieldDescriptor = nested.getKey();
FieldAccessDescriptor nestedAccess = nested.getValue();
Field field = inputSchema.getField(checkNotNull(fieldDescriptor.getFieldId()));
if (fieldDescriptor.getFieldRename() != null) {
field = field.withName(fieldDescriptor.getFieldRename());
}
Schema outputSchema = getOutputSchemaHelper(field.getType(), nestedAccess, fieldDescriptor.getQualifiers(), 0, isNullable || field.getType().getNullable());
schemas.add(outputSchema);
}
return union(schemas);
}
Aggregations