use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class ProjectionPushdownOptimizerTest method testIntermediateProducer.
@Test
public void testIntermediateProducer() {
Pipeline p = Pipeline.create();
SimpleSource source = new SimpleSource(FieldAccessDescriptor.withFieldNames("foo", "bar", "baz"));
IntermediateTransformWithPushdown originalT = new IntermediateTransformWithPushdown(FieldAccessDescriptor.withFieldNames("foo", "bar", "baz"));
FieldAccessDescriptor downstreamFieldAccess = FieldAccessDescriptor.withFieldNames("foo", "bar");
p.apply(source).apply(originalT).apply(new FieldAccessTransform(downstreamFieldAccess));
// TODO(BEAM-13658) Support pushdown on intermediate transforms.
// For now, test that the pushdown optimizer ignores immediate transforms.
ProjectionPushdownOptimizer.optimize(p);
Assert.assertTrue(pipelineHasTransform(p, originalT));
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class ProjectionProducerVisitorTest method testPushdownProducersWithMultipleOutputs_returnsMultiplePushdowns.
@Test
public void testPushdownProducersWithMultipleOutputs_returnsMultiplePushdowns() {
Pipeline p = Pipeline.create();
PTransform<PBegin, PCollectionTuple> source = new MultipleOutputSourceWithPushdown();
PCollectionTuple outputs = p.apply(source);
Map<PCollection<?>, FieldAccessDescriptor> pCollectionFieldAccess = ImmutableMap.of(outputs.get("output1"), FieldAccessDescriptor.withFieldNames("field1", "field2"), outputs.get("output2"), FieldAccessDescriptor.withFieldNames("field3", "field4"));
ProjectionProducerVisitor visitor = new ProjectionProducerVisitor(pCollectionFieldAccess);
p.traverseTopologically(visitor);
Map<ProjectionProducer<PTransform<?, ?>>, Map<PCollection<?>, FieldAccessDescriptor>> pushdownOpportunities = visitor.getPushdownOpportunities();
Assert.assertEquals(1, pushdownOpportunities.size());
Map<PCollection<?>, FieldAccessDescriptor> opportunitiesForSource = pushdownOpportunities.get(source);
Assert.assertNotNull(opportunitiesForSource);
Assert.assertEquals(2, opportunitiesForSource.size());
FieldAccessDescriptor fieldAccessDescriptor1 = opportunitiesForSource.get(outputs.get("output1"));
Assert.assertNotNull(fieldAccessDescriptor1);
Assert.assertFalse(fieldAccessDescriptor1.getAllFields());
assertThat(fieldAccessDescriptor1.fieldNamesAccessed(), containsInAnyOrder("field1", "field2"));
FieldAccessDescriptor fieldAccessDescriptor2 = opportunitiesForSource.get(outputs.get("output2"));
Assert.assertNotNull(fieldAccessDescriptor2);
Assert.assertFalse(fieldAccessDescriptor2.getAllFields());
assertThat(fieldAccessDescriptor2.fieldNamesAccessed(), containsInAnyOrder("field3", "field4"));
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class FieldAccessVisitorTest method testFieldAccessTwoKnownMainInputs.
@Test
public void testFieldAccessTwoKnownMainInputs() {
Pipeline p = Pipeline.create();
FieldAccessVisitor fieldAccessVisitor = new FieldAccessVisitor();
Schema schema = Schema.of(Field.of("field1", FieldType.STRING), Field.of("field2", FieldType.STRING), Field.of("field3", FieldType.STRING));
PCollection<Row> source = p.apply(Create.of(Row.withSchema(schema).addValues("foo", "bar", "baz").build())).setRowSchema(schema);
source.apply(new FieldAccessTransform(FieldAccessDescriptor.withFieldNames("field1")));
source.apply(new FieldAccessTransform(FieldAccessDescriptor.withFieldNames("field2")));
p.traverseTopologically(fieldAccessVisitor);
FieldAccessDescriptor fieldAccess = fieldAccessVisitor.getPCollectionFieldAccess().get(source);
assertFalse(fieldAccess.getAllFields());
assertThat(fieldAccess.fieldNamesAccessed(), containsInAnyOrder("field1", "field2"));
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class FieldAccessVisitorTest method testFieldAccessKnownMainInput.
@Test
public void testFieldAccessKnownMainInput() {
Pipeline p = Pipeline.create();
FieldAccessVisitor fieldAccessVisitor = new FieldAccessVisitor();
Schema schema = Schema.of(Field.of("field1", FieldType.STRING), Field.of("field2", FieldType.STRING));
PCollection<Row> source = p.apply(Create.of(Row.withSchema(schema).addValues("foo", "bar").build())).setRowSchema(schema);
source.apply(new FieldAccessTransform(FieldAccessDescriptor.withFieldNames("field1")));
p.traverseTopologically(fieldAccessVisitor);
FieldAccessDescriptor fieldAccess = fieldAccessVisitor.getPCollectionFieldAccess().get(source);
assertFalse(fieldAccess.getAllFields());
assertThat(fieldAccess.fieldNamesAccessed(), containsInAnyOrder("field1"));
}
use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.
the class ParDo method getDoFnSchemaInformation.
/**
* Extract information on how the DoFn uses schemas. In particular, if the schema of an element
* parameter does not match the input PCollection's schema, convert.
*/
@Internal
public static DoFnSchemaInformation getDoFnSchemaInformation(DoFn<?, ?> fn, PCollection<?> input) {
DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
DoFnSignature.ProcessElementMethod processElementMethod = signature.processElement();
if (!processElementMethod.getSchemaElementParameters().isEmpty()) {
if (!input.hasSchema()) {
throw new IllegalArgumentException("Type of @Element must match the DoFn type" + input);
}
}
SchemaRegistry schemaRegistry = input.getPipeline().getSchemaRegistry();
DoFnSchemaInformation doFnSchemaInformation = DoFnSchemaInformation.create();
for (SchemaElementParameter parameter : processElementMethod.getSchemaElementParameters()) {
TypeDescriptor<?> elementT = parameter.elementT();
FieldAccessDescriptor accessDescriptor = getFieldAccessDescriptorFromParameter(parameter.fieldAccessString(), input.getSchema(), signature.fieldAccessDeclarations(), fn);
doFnSchemaInformation = doFnSchemaInformation.withFieldAccessDescriptor(accessDescriptor);
Schema selectedSchema = SelectHelpers.getOutputSchema(input.getSchema(), accessDescriptor);
ConvertHelpers.ConvertedSchemaInformation converted = ConvertHelpers.getConvertedSchemaInformation(selectedSchema, elementT, schemaRegistry);
if (converted.outputSchemaCoder != null) {
doFnSchemaInformation = doFnSchemaInformation.withSelectFromSchemaParameter((SchemaCoder<?>) input.getCoder(), accessDescriptor, selectedSchema, converted.outputSchemaCoder, converted.unboxedType != null);
} else {
// If the selected schema is a Row containing a single primitive type (which is the output
// of Select when selecting a primitive), attempt to unbox it and match against the
// parameter.
checkArgument(converted.unboxedType != null);
doFnSchemaInformation = doFnSchemaInformation.withUnboxPrimitiveParameter((SchemaCoder<?>) input.getCoder(), accessDescriptor, selectedSchema, elementT);
}
}
for (DoFnSignature.Parameter p : processElementMethod.extraParameters()) {
if (p instanceof ProcessContextParameter || p instanceof ElementParameter) {
doFnSchemaInformation = doFnSchemaInformation.withFieldAccessDescriptor(FieldAccessDescriptor.withAllFields());
break;
}
}
return doFnSchemaInformation;
}
Aggregations