Examples with PCollection - org.apache.beam.sdk.values.PCollection

Example 21 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class TransformHierarchyTest method visitAfterReplace.

/**
 * Tests that visiting the {@link TransformHierarchy} after replacing nodes does not visit any of
 * the original nodes or inaccessible values but does visit all of the replacement nodes, new
 * inaccessible replacement values, and the original output values.
 */
@Test
public void visitAfterReplace() {
    Node root = hierarchy.getCurrent();
    final SingleOutput<Long, Long> originalParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    });
    GenerateSequence genUpstream = GenerateSequence.from(0);
    PCollection<Long> upstream = pipeline.apply(genUpstream);
    PCollection<Long> output = upstream.apply("Original", originalParDo);
    Node upstreamNode = hierarchy.pushNode("Upstream", pipeline.begin(), genUpstream);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(upstream);
    hierarchy.popNode();
    Node original = hierarchy.pushNode("Original", upstream, originalParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(output);
    hierarchy.popNode();
    final TupleTag<Long> longs = new TupleTag<>();
    final MultiOutput<Long, Long> replacementParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    }).withOutputTags(longs, TupleTagList.empty());
    PTransform<PCollection<Long>, PCollection<Long>> replacementComposite = new PTransform<PCollection<Long>, PCollection<Long>>() {

        @Override
        public PCollection<Long> expand(PCollection<Long> input) {
            return input.apply("Contained", replacementParDo).get(longs);
        }
    };
    PCollectionTuple replacementOutput = upstream.apply("Contained", replacementParDo);
    Node compositeNode = hierarchy.replaceNode(original, upstream, replacementComposite);
    Node replacementParNode = hierarchy.pushNode("Original/Contained", upstream, replacementParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(replacementOutput);
    hierarchy.popNode();
    hierarchy.setOutput(replacementOutput.get(longs));
    Map<TupleTag<?>, PCollection<?>> expandedReplacementOutput = (Map) replacementOutput.expand();
    Entry<TupleTag<?>, PCollection<?>> replacementLongs = Iterables.getOnlyElement(expandedReplacementOutput.entrySet());
    hierarchy.replaceOutputs(Collections.singletonMap(replacementOutput.get(longs), ReplacementOutput.of(TaggedPValue.ofExpandedValue(output), TaggedPValue.of(replacementLongs.getKey(), replacementLongs.getValue()))));
    hierarchy.popNode();
    final Set<Node> visitedCompositeNodes = new HashSet<>();
    final Set<Node> visitedPrimitiveNodes = new HashSet<>();
    Set<PValue> visitedValues = hierarchy.visit(new Defaults() {

        @Override
        public CompositeBehavior enterCompositeTransform(Node node) {
            visitedCompositeNodes.add(node);
            return CompositeBehavior.ENTER_TRANSFORM;
        }

        @Override
        public void visitPrimitiveTransform(Node node) {
            visitedPrimitiveNodes.add(node);
        }
    });
    /*
    Final Graph:
    Upstream -> Upstream.out -> Composite -> (ReplacementParDo -> OriginalParDo.out)
    */
    assertThat(visitedCompositeNodes, containsInAnyOrder(root, compositeNode));
    assertThat(visitedPrimitiveNodes, containsInAnyOrder(upstreamNode, replacementParNode));
    assertThat(visitedValues, containsInAnyOrder(upstream, output));
}

Also used : Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) TupleTag(org.apache.beam.sdk.values.TupleTag) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PTransform(org.apache.beam.sdk.transforms.PTransform) HashSet(java.util.HashSet) TaggedPValue(org.apache.beam.sdk.values.TaggedPValue) PValue(org.apache.beam.sdk.values.PValue) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) PCollection(org.apache.beam.sdk.values.PCollection) DoFn(org.apache.beam.sdk.transforms.DoFn) Defaults(org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults) Map(java.util.Map) Test(org.junit.Test)

Example 22 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class JdbcIOTest method testReadRowsWithNumericFieldsWithExcessPrecision.

@Test
public void testReadRowsWithNumericFieldsWithExcessPrecision() {
    PCollection<Row> rows = pipeline.apply(JdbcIO.readRows().withDataSourceConfiguration(DATA_SOURCE_CONFIGURATION).withQuery(String.format("SELECT CAST(1 AS NUMERIC(10, 2)) AS T1 FROM %s WHERE name = ?", READ_TABLE_NAME)).withStatementPreparator(preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1))));
    Schema expectedSchema = Schema.of(Schema.Field.of("T1", FieldType.logicalType(FixedPrecisionNumeric.of(NUMERIC.getName(), 10, 2)).withNullable(false)));
    assertEquals(expectedSchema, rows.getSchema());
    PCollection<Row> output = rows.apply(Select.fieldNames("T1"));
    PAssert.that(output).containsInAnyOrder(ImmutableList.of(Row.withSchema(expectedSchema).addValues(BigDecimal.valueOf(1).setScale(2, RoundingMode.HALF_UP)).build()));
    pipeline.run();
}

Also used : Count(org.apache.beam.sdk.transforms.Count) ExpectedLogs(org.apache.beam.sdk.testing.ExpectedLogs) Arrays(java.util.Arrays) PipelineExecutionException(org.apache.beam.sdk.Pipeline.PipelineExecutionException) SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) Connection(java.sql.Connection) Time(java.sql.Time) Matchers.not(org.hamcrest.Matchers.not) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Array(java.sql.Array) PoolableDataSourceProvider(org.apache.beam.sdk.io.jdbc.JdbcIO.PoolableDataSourceProvider) BigDecimal(java.math.BigDecimal) Matchers.closeTo(org.hamcrest.Matchers.closeTo) Create(org.apache.beam.sdk.transforms.Create) Wait(org.apache.beam.sdk.transforms.Wait) PoolingDataSource(org.apache.commons.dbcp2.PoolingDataSource) RoundingMode(java.math.RoundingMode) KvCoder(org.apache.beam.sdk.coders.KvCoder) NULL(java.sql.JDBCType.NULL) TimeZone(java.util.TimeZone) Timestamp(java.sql.Timestamp) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) UUID(java.util.UUID) PreparedStatement(java.sql.PreparedStatement) LogRecord(java.util.logging.LogRecord) TypeSafeMatcher(org.hamcrest.TypeSafeMatcher) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) Matchers.any(org.mockito.Matchers.any) List(java.util.List) PartitioningFn(org.apache.beam.sdk.io.jdbc.JdbcUtil.PartitioningFn) ParDo(org.apache.beam.sdk.transforms.ParDo) SerializableUtils(org.apache.beam.sdk.util.SerializableUtils) Assert.assertFalse(org.junit.Assert.assertFalse) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) ISOChronology(org.joda.time.chrono.ISOChronology) TestStream(org.apache.beam.sdk.testing.TestStream) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) DatabaseTestHelper.assertRowCount(org.apache.beam.sdk.io.common.DatabaseTestHelper.assertRowCount) Matchers.containsString(org.hamcrest.Matchers.containsString) Mockito.mock(org.mockito.Mockito.mock) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) BeforeClass(org.junit.BeforeClass) Assert.assertThrows(org.junit.Assert.assertThrows) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) NUMERIC(java.sql.JDBCType.NUMERIC) ArrayList(java.util.ArrayList) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) FixedPrecisionNumeric(org.apache.beam.sdk.io.jdbc.LogicalTypes.FixedPrecisionNumeric) Assert.assertSame(org.junit.Assert.assertSame) JDBCType(java.sql.JDBCType) SQLException(java.sql.SQLException) Calendar(java.util.Calendar) Charset(java.nio.charset.Charset) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) DataSource(javax.sql.DataSource) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Row(org.apache.beam.sdk.values.Row) ExpectedException(org.junit.rules.ExpectedException) Select(org.apache.beam.sdk.schemas.transforms.Select) Description(org.hamcrest.Description) PAssert(org.apache.beam.sdk.testing.PAssert) TestRow(org.apache.beam.sdk.io.common.TestRow) DataSourceConfiguration(org.apache.beam.sdk.io.jdbc.JdbcIO.DataSourceConfiguration) DateTime(org.joda.time.DateTime) Assert.assertTrue(org.junit.Assert.assertTrue) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) Mockito.verify(org.mockito.Mockito.verify) Date(java.sql.Date) LocalDate(org.joda.time.LocalDate) Rule(org.junit.Rule) Instant(org.joda.time.Instant) Statement(java.sql.Statement) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) Collections(java.util.Collections) DatabaseTestHelper(org.apache.beam.sdk.io.common.DatabaseTestHelper) Assert.assertEquals(org.junit.Assert.assertEquals) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) Schema(org.apache.beam.sdk.schemas.Schema) Row(org.apache.beam.sdk.values.Row) TestRow(org.apache.beam.sdk.io.common.TestRow) Test(org.junit.Test)

Example 23 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class JdbcIOTest method testWriteWithoutPreparedStatementWithReadRows.

@Test
public void testWriteWithoutPreparedStatementWithReadRows() throws Exception {
    SerializableFunction<Void, DataSource> dataSourceProvider = ignored -> DATA_SOURCE;
    PCollection<Row> rows = pipeline.apply(JdbcIO.readRows().withDataSourceProviderFn(dataSourceProvider).withQuery(String.format("select name,id from %s where name = ?", READ_TABLE_NAME)).withStatementPreparator(preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1))));
    String writeTableName = DatabaseTestHelper.getTestTableName("UT_WRITE_PS_WITH_READ_ROWS");
    DatabaseTestHelper.createTable(DATA_SOURCE, writeTableName);
    try {
        rows.apply(JdbcIO.<Row>write().withDataSourceConfiguration(DATA_SOURCE_CONFIGURATION).withBatchSize(10L).withTable(writeTableName));
        pipeline.run();
    } finally {
        DatabaseTestHelper.deleteTable(DATA_SOURCE, writeTableName);
    }
}

Also used : Count(org.apache.beam.sdk.transforms.Count) ExpectedLogs(org.apache.beam.sdk.testing.ExpectedLogs) Arrays(java.util.Arrays) PipelineExecutionException(org.apache.beam.sdk.Pipeline.PipelineExecutionException) SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) Connection(java.sql.Connection) Time(java.sql.Time) Matchers.not(org.hamcrest.Matchers.not) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Array(java.sql.Array) PoolableDataSourceProvider(org.apache.beam.sdk.io.jdbc.JdbcIO.PoolableDataSourceProvider) BigDecimal(java.math.BigDecimal) Matchers.closeTo(org.hamcrest.Matchers.closeTo) Create(org.apache.beam.sdk.transforms.Create) Wait(org.apache.beam.sdk.transforms.Wait) PoolingDataSource(org.apache.commons.dbcp2.PoolingDataSource) RoundingMode(java.math.RoundingMode) KvCoder(org.apache.beam.sdk.coders.KvCoder) NULL(java.sql.JDBCType.NULL) TimeZone(java.util.TimeZone) Timestamp(java.sql.Timestamp) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) UUID(java.util.UUID) PreparedStatement(java.sql.PreparedStatement) LogRecord(java.util.logging.LogRecord) TypeSafeMatcher(org.hamcrest.TypeSafeMatcher) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) Matchers.any(org.mockito.Matchers.any) List(java.util.List) PartitioningFn(org.apache.beam.sdk.io.jdbc.JdbcUtil.PartitioningFn) ParDo(org.apache.beam.sdk.transforms.ParDo) SerializableUtils(org.apache.beam.sdk.util.SerializableUtils) Assert.assertFalse(org.junit.Assert.assertFalse) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) ISOChronology(org.joda.time.chrono.ISOChronology) TestStream(org.apache.beam.sdk.testing.TestStream) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) DatabaseTestHelper.assertRowCount(org.apache.beam.sdk.io.common.DatabaseTestHelper.assertRowCount) Matchers.containsString(org.hamcrest.Matchers.containsString) Mockito.mock(org.mockito.Mockito.mock) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) BeforeClass(org.junit.BeforeClass) Assert.assertThrows(org.junit.Assert.assertThrows) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) NUMERIC(java.sql.JDBCType.NUMERIC) ArrayList(java.util.ArrayList) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) FixedPrecisionNumeric(org.apache.beam.sdk.io.jdbc.LogicalTypes.FixedPrecisionNumeric) Assert.assertSame(org.junit.Assert.assertSame) JDBCType(java.sql.JDBCType) SQLException(java.sql.SQLException) Calendar(java.util.Calendar) Charset(java.nio.charset.Charset) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) DataSource(javax.sql.DataSource) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Row(org.apache.beam.sdk.values.Row) ExpectedException(org.junit.rules.ExpectedException) Select(org.apache.beam.sdk.schemas.transforms.Select) Description(org.hamcrest.Description) PAssert(org.apache.beam.sdk.testing.PAssert) TestRow(org.apache.beam.sdk.io.common.TestRow) DataSourceConfiguration(org.apache.beam.sdk.io.jdbc.JdbcIO.DataSourceConfiguration) DateTime(org.joda.time.DateTime) Assert.assertTrue(org.junit.Assert.assertTrue) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) Mockito.verify(org.mockito.Mockito.verify) Date(java.sql.Date) LocalDate(org.joda.time.LocalDate) Rule(org.junit.Rule) Instant(org.joda.time.Instant) Statement(java.sql.Statement) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) Collections(java.util.Collections) DatabaseTestHelper(org.apache.beam.sdk.io.common.DatabaseTestHelper) Assert.assertEquals(org.junit.Assert.assertEquals) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) Row(org.apache.beam.sdk.values.Row) TestRow(org.apache.beam.sdk.io.common.TestRow) Matchers.containsString(org.hamcrest.Matchers.containsString) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) PoolingDataSource(org.apache.commons.dbcp2.PoolingDataSource) DataSource(javax.sql.DataSource) Test(org.junit.Test)

Example 24 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class JdbcIOTest method testReadWithSchema.

@Test
public void testReadWithSchema() {
    SerializableFunction<Void, DataSource> dataSourceProvider = ignored -> DATA_SOURCE;
    JdbcIO.RowMapper<RowWithSchema> rowMapper = rs -> new RowWithSchema(rs.getString("NAME"), rs.getInt("ID"));
    pipeline.getSchemaRegistry().registerJavaBean(RowWithSchema.class);
    PCollection<RowWithSchema> rows = pipeline.apply(JdbcIO.<RowWithSchema>read().withDataSourceProviderFn(dataSourceProvider).withQuery(String.format("select name,id from %s where name = ?", READ_TABLE_NAME)).withRowMapper(rowMapper).withCoder(SerializableCoder.of(RowWithSchema.class)).withStatementPreparator(preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1))));
    Schema expectedSchema = Schema.of(Schema.Field.of("name", Schema.FieldType.STRING), Schema.Field.of("id", Schema.FieldType.INT32));
    assertEquals(expectedSchema, rows.getSchema());
    PCollection<Row> output = rows.apply(Select.fieldNames("name", "id"));
    PAssert.that(output).containsInAnyOrder(ImmutableList.of(Row.withSchema(expectedSchema).addValues("Testval1", 1).build()));
    pipeline.run();
}

Example 25 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class JdbcIOTest method testReadRowsWithDataSourceConfiguration.

@Test
public void testReadRowsWithDataSourceConfiguration() {
    PCollection<Row> rows = pipeline.apply(JdbcIO.readRows().withDataSourceConfiguration(DATA_SOURCE_CONFIGURATION).withQuery(String.format("select name,id from %s where name = ?", READ_TABLE_NAME)).withStatementPreparator(preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1))));
    Schema expectedSchema = Schema.of(Schema.Field.of("NAME", LogicalTypes.variableLengthString(JDBCType.VARCHAR, 500)).withNullable(true), Schema.Field.of("ID", Schema.FieldType.INT32).withNullable(true));
    assertEquals(expectedSchema, rows.getSchema());
    PCollection<Row> output = rows.apply(Select.fieldNames("NAME", "ID"));
    PAssert.that(output).containsInAnyOrder(ImmutableList.of(Row.withSchema(expectedSchema).addValues("Testval1", 1).build()));
    pipeline.run();
}

Aggregations

PCollection (org.apache.beam.sdk.values.PCollection)198 Test (org.junit.Test)133 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)61 KV (org.apache.beam.sdk.values.KV)61 Map (java.util.Map)59 List (java.util.List)58 Rule (org.junit.Rule)57 RunWith (org.junit.runner.RunWith)54 PAssert (org.apache.beam.sdk.testing.PAssert)52 Instant (org.joda.time.Instant)46 Duration (org.joda.time.Duration)45 JUnit4 (org.junit.runners.JUnit4)45 ParDo (org.apache.beam.sdk.transforms.ParDo)44 TupleTag (org.apache.beam.sdk.values.TupleTag)42 Pipeline (org.apache.beam.sdk.Pipeline)41 Create (org.apache.beam.sdk.transforms.Create)41 ArrayList (java.util.ArrayList)40 Serializable (java.io.Serializable)39 PTransform (org.apache.beam.sdk.transforms.PTransform)37 Row (org.apache.beam.sdk.values.Row)37