Search in sources :

Example 36 with FieldAccessDescriptor

use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.

the class ProjectionProducerVisitorTest method testSimplePushdownProducer_returnsOnePushdown.

@Test
public void testSimplePushdownProducer_returnsOnePushdown() {
    Pipeline p = Pipeline.create();
    PTransform<PBegin, PCollection<Row>> source = new SimpleSourceWithPushdown();
    PCollection<Row> output = p.apply(source);
    Map<PCollection<?>, FieldAccessDescriptor> pCollectionFieldAccess = ImmutableMap.of(output, FieldAccessDescriptor.withFieldNames("field1", "field2"));
    ProjectionProducerVisitor visitor = new ProjectionProducerVisitor(pCollectionFieldAccess);
    p.traverseTopologically(visitor);
    Map<ProjectionProducer<PTransform<?, ?>>, Map<PCollection<?>, FieldAccessDescriptor>> pushdownOpportunities = visitor.getPushdownOpportunities();
    Assert.assertEquals(1, pushdownOpportunities.size());
    Map<PCollection<?>, FieldAccessDescriptor> opportunitiesForSource = pushdownOpportunities.get(source);
    Assert.assertNotNull(opportunitiesForSource);
    Assert.assertEquals(1, opportunitiesForSource.size());
    FieldAccessDescriptor fieldAccessDescriptor = opportunitiesForSource.get(output);
    Assert.assertNotNull(fieldAccessDescriptor);
    Assert.assertFalse(fieldAccessDescriptor.getAllFields());
    assertThat(fieldAccessDescriptor.fieldNamesAccessed(), containsInAnyOrder("field1", "field2"));
}
Also used : FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) PBegin(org.apache.beam.sdk.values.PBegin) Pipeline(org.apache.beam.sdk.Pipeline) PCollection(org.apache.beam.sdk.values.PCollection) ProjectionProducer(org.apache.beam.sdk.schemas.ProjectionProducer) Row(org.apache.beam.sdk.values.Row) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) Test(org.junit.Test)

Example 37 with FieldAccessDescriptor

use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.

the class ProjectionProducerVisitorTest method testProjectionProducerInsideNonProducer_returnsInnerPushdown.

@Test
public void testProjectionProducerInsideNonProducer_returnsInnerPushdown() {
    Pipeline p = Pipeline.create();
    CompositeTransformWithPushdownInside source = new CompositeTransformWithPushdownInside();
    PCollection<Row> output = p.apply(source);
    Map<PCollection<?>, FieldAccessDescriptor> pCollectionFieldAccess = ImmutableMap.of(output, FieldAccessDescriptor.withFieldNames("field1", "field2"));
    ProjectionProducerVisitor visitor = new ProjectionProducerVisitor(pCollectionFieldAccess);
    p.traverseTopologically(visitor);
    Map<ProjectionProducer<PTransform<?, ?>>, Map<PCollection<?>, FieldAccessDescriptor>> pushdownOpportunities = visitor.getPushdownOpportunities();
    Assert.assertEquals(1, pushdownOpportunities.size());
    Map<PCollection<?>, FieldAccessDescriptor> opportunitiesForSource = pushdownOpportunities.get(source.innerT);
    Assert.assertNotNull(opportunitiesForSource);
    Assert.assertEquals(1, opportunitiesForSource.size());
    FieldAccessDescriptor fieldAccessDescriptor = opportunitiesForSource.get(output);
    Assert.assertNotNull(fieldAccessDescriptor);
    Assert.assertFalse(fieldAccessDescriptor.getAllFields());
    assertThat(fieldAccessDescriptor.fieldNamesAccessed(), containsInAnyOrder("field1", "field2"));
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) ProjectionProducer(org.apache.beam.sdk.schemas.ProjectionProducer) Row(org.apache.beam.sdk.values.Row) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 38 with FieldAccessDescriptor

use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.

the class ProjectionProducerVisitorTest method testFieldAccessAllFields_returnsNoPushdown.

@Test
public void testFieldAccessAllFields_returnsNoPushdown() {
    Pipeline p = Pipeline.create();
    PCollection<Row> output = p.apply(new SimpleSource());
    Map<PCollection<?>, FieldAccessDescriptor> pCollectionFieldAccess = ImmutableMap.of(output, FieldAccessDescriptor.withAllFields());
    ProjectionProducerVisitor visitor = new ProjectionProducerVisitor(pCollectionFieldAccess);
    p.traverseTopologically(visitor);
    Map<ProjectionProducer<PTransform<?, ?>>, Map<PCollection<?>, FieldAccessDescriptor>> pushdownOpportunities = visitor.getPushdownOpportunities();
    Assert.assertTrue(pushdownOpportunities.isEmpty());
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) ProjectionProducer(org.apache.beam.sdk.schemas.ProjectionProducer) Row(org.apache.beam.sdk.values.Row) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 39 with FieldAccessDescriptor

use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.

the class SelectHelpers method selectIntoRow.

/**
 * Select out of a given {@link Row} object.
 */
private static void selectIntoRow(Schema inputSchema, Row input, Row.Builder output, FieldAccessDescriptor fieldAccessDescriptor) {
    if (fieldAccessDescriptor.getAllFields()) {
        List<Object> values = (input != null) ? input.getValues() : Collections.nCopies(inputSchema.getFieldCount(), null);
        output.addValues(values);
        return;
    }
    for (int fieldId : fieldAccessDescriptor.fieldIdsAccessed()) {
        // TODO: Once we support specific qualifiers (like array slices), extract them here.
        output.addValue((input != null) ? input.getValue(fieldId) : null);
    }
    Schema outputSchema = output.getSchema();
    for (Map.Entry<FieldDescriptor, FieldAccessDescriptor> nested : fieldAccessDescriptor.getNestedFieldsAccessed().entrySet()) {
        FieldDescriptor field = nested.getKey();
        FieldAccessDescriptor nestedAccess = nested.getValue();
        FieldType nestedInputType = inputSchema.getField(field.getFieldId()).getType();
        FieldType nestedOutputType = outputSchema.getField(output.nextFieldId()).getType();
        selectIntoRowWithQualifiers(field.getQualifiers(), 0, input.getValue(field.getFieldId()), output, nestedAccess, nestedInputType, nestedOutputType);
    }
}
Also used : FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) Schema(org.apache.beam.sdk.schemas.Schema) Map(java.util.Map) FieldDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType)

Example 40 with FieldAccessDescriptor

use of org.apache.beam.sdk.schemas.FieldAccessDescriptor in project beam by apache.

the class SelectHelpers method getOutputSchemaTrackingNullable.

private static Schema getOutputSchemaTrackingNullable(Schema inputSchema, FieldAccessDescriptor fieldAccessDescriptor, boolean isNullable) {
    if (fieldAccessDescriptor.getAllFields()) {
        Schema schemaToReturn = inputSchema;
        if (isNullable) {
            // Some parent field in the selector was nullable, so we must mark all of these fields
            // nullable.
            schemaToReturn = inputSchema.getFields().stream().map(f -> f.withNullable(true)).collect(Schema.toSchema());
        }
        return schemaToReturn;
    }
    List<Schema> schemas = Lists.newArrayList();
    Schema.Builder builder = Schema.builder();
    for (FieldDescriptor fieldDescriptor : fieldAccessDescriptor.getFieldsAccessed()) {
        Field field = inputSchema.getField(fieldDescriptor.getFieldId());
        if (fieldDescriptor.getFieldRename() != null) {
            field = field.withName(fieldDescriptor.getFieldRename());
        }
        // be nullable if a is nullable (even if b was not in the original schema).
        if (isNullable) {
            field = field.withNullable(true);
        }
        builder.addField(field);
    }
    schemas.add(builder.build());
    for (Map.Entry<FieldDescriptor, FieldAccessDescriptor> nested : fieldAccessDescriptor.getNestedFieldsAccessed().entrySet()) {
        FieldDescriptor fieldDescriptor = nested.getKey();
        FieldAccessDescriptor nestedAccess = nested.getValue();
        Field field = inputSchema.getField(checkNotNull(fieldDescriptor.getFieldId()));
        if (fieldDescriptor.getFieldRename() != null) {
            field = field.withName(fieldDescriptor.getFieldRename());
        }
        Schema outputSchema = getOutputSchemaHelper(field.getType(), nestedAccess, fieldDescriptor.getQualifiers(), 0, isNullable || field.getType().getNullable());
        schemas.add(outputSchema);
    }
    return union(schemas);
}
Also used : Field(org.apache.beam.sdk.schemas.Schema.Field) FieldAccessDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor) Schema(org.apache.beam.sdk.schemas.Schema) Map(java.util.Map) FieldDescriptor(org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor)

Aggregations

FieldAccessDescriptor (org.apache.beam.sdk.schemas.FieldAccessDescriptor)65 Test (org.junit.Test)49 Row (org.apache.beam.sdk.values.Row)47 Schema (org.apache.beam.sdk.schemas.Schema)42 PCollection (org.apache.beam.sdk.values.PCollection)16 Map (java.util.Map)12 Pipeline (org.apache.beam.sdk.Pipeline)11 ProjectionProducer (org.apache.beam.sdk.schemas.ProjectionProducer)9 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)8 ParDo (org.apache.beam.sdk.transforms.ParDo)5 DoFnSchemaInformation (org.apache.beam.sdk.transforms.DoFnSchemaInformation)4 PBegin (org.apache.beam.sdk.values.PBegin)4 DefaultTableFilter (org.apache.beam.sdk.extensions.sql.meta.DefaultTableFilter)3 FieldType (org.apache.beam.sdk.schemas.Schema.FieldType)3 PTransform (org.apache.beam.sdk.transforms.PTransform)3 List (java.util.List)2 Collectors (java.util.stream.Collectors)2 AutoValueSchema (org.apache.beam.sdk.schemas.AutoValueSchema)2 FieldDescriptor (org.apache.beam.sdk.schemas.FieldAccessDescriptor.FieldDescriptor)2 Field (org.apache.beam.sdk.schemas.Schema.Field)2