Examples with SerializableFunction - org.apache.beam.sdk.transforms.SerializableFunction

Example 1 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class BigQueryIOTest method testWriteWithDynamicTables.

public void testWriteWithDynamicTables(boolean streaming) throws Exception {
    BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
    bqOptions.setProject("defaultproject");
    bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
    FakeDatasetService datasetService = new FakeDatasetService();
    datasetService.createDataset("project-id", "dataset-id", "", "");
    FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withDatasetService(datasetService).withJobService(new FakeJobService());
    List<Integer> inserts = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        inserts.add(i);
    }
    // Create a windowing strategy that puts the input into five different windows depending on
    // record value.
    WindowFn<Integer, PartitionedGlobalWindow> windowFn = new PartitionedGlobalWindows(new SerializableFunction<Integer, String>() {

        @Override
        public String apply(Integer i) {
            return Integer.toString(i % 5);
        }
    });
    final Map<Integer, TableDestination> targetTables = Maps.newHashMap();
    Map<String, String> schemas = Maps.newHashMap();
    for (int i = 0; i < 5; i++) {
        TableDestination destination = new TableDestination("project-id:dataset-id" + ".table-id-" + i, "");
        targetTables.put(i, destination);
        // Make sure each target table has its own custom table.
        schemas.put(destination.getTableSpec(), BigQueryHelpers.toJsonString(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("name").setType("STRING"), new TableFieldSchema().setName("number").setType("INTEGER"), new TableFieldSchema().setName("custom_" + i).setType("STRING")))));
    }
    SerializableFunction<ValueInSingleWindow<Integer>, TableDestination> tableFunction = new SerializableFunction<ValueInSingleWindow<Integer>, TableDestination>() {

        @Override
        public TableDestination apply(ValueInSingleWindow<Integer> input) {
            PartitionedGlobalWindow window = (PartitionedGlobalWindow) input.getWindow();
            // Check that we can access the element as well here and that it matches the window.
            checkArgument(window.value.equals(Integer.toString(input.getValue() % 5)), "Incorrect element");
            return targetTables.get(input.getValue() % 5);
        }
    };
    Pipeline p = TestPipeline.create(bqOptions);
    PCollection<Integer> input = p.apply("CreateSource", Create.of(inserts));
    if (streaming) {
        input = input.setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
    }
    PCollectionView<Map<String, String>> schemasView = p.apply("CreateSchemaMap", Create.of(schemas)).apply("ViewSchemaAsMap", View.<String, String>asMap());
    input.apply(Window.<Integer>into(windowFn)).apply(BigQueryIO.<Integer>write().to(tableFunction).withFormatFunction(new SerializableFunction<Integer, TableRow>() {

        @Override
        public TableRow apply(Integer i) {
            return new TableRow().set("name", "number" + i).set("number", i);
        }
    }).withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED).withSchemaFromView(schemasView).withTestServices(fakeBqServices).withoutValidation());
    p.run();
    for (int i = 0; i < 5; ++i) {
        String tableId = String.format("table-id-%d", i);
        String tableSpec = String.format("project-id:dataset-id.%s", tableId);
        // Verify that table was created with the correct schema.
        assertThat(BigQueryHelpers.toJsonString(datasetService.getTable(new TableReference().setProjectId("project-id").setDatasetId("dataset-id").setTableId(tableId)).getSchema()), equalTo(schemas.get(tableSpec)));
        // Verify that the table has the expected contents.
        assertThat(datasetService.getAllRows("project-id", "dataset-id", tableId), containsInAnyOrder(new TableRow().set("name", String.format("number%d", i)).set("number", i), new TableRow().set("name", String.format("number%d", i + 5)).set("number", i + 5)));
    }
}

Also used : TableSchema(com.google.api.services.bigquery.model.TableSchema) JsonSchemaToTableSchema(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ArrayList(java.util.ArrayList) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) TableFieldSchema(com.google.api.services.bigquery.model.TableFieldSchema) BigQueryHelpers.createTempTableReference(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference) TableReference(com.google.api.services.bigquery.model.TableReference) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) TableRow(com.google.api.services.bigquery.model.TableRow) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 2 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class BigQueryIOTest method testTransformingSource.

@Test
public void testTransformingSource() throws Exception {
    int numElements = 10000;
    @SuppressWarnings("deprecation") BoundedSource<Long> longSource = CountingSource.upTo(numElements);
    SerializableFunction<Long, String> toStringFn = new SerializableFunction<Long, String>() {

        @Override
        public String apply(Long input) {
            return input.toString();
        }
    };
    BoundedSource<String> stringSource = new TransformingSource<>(longSource, toStringFn, StringUtf8Coder.of());
    List<String> expected = Lists.newArrayList();
    for (int i = 0; i < numElements; i++) {
        expected.add(String.valueOf(i));
    }
    PipelineOptions options = PipelineOptionsFactory.create();
    Assert.assertThat(SourceTestUtils.readFromSource(stringSource, options), CoreMatchers.is(expected));
    SourceTestUtils.assertSplitAtFractionBehavior(stringSource, 100, 0.3, ExpectedSplitOutcome.MUST_SUCCEED_AND_BE_CONSISTENT, options);
    SourceTestUtils.assertSourcesEqualReferenceSource(stringSource, stringSource.split(100, options), options);
}

Also used : SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) BigQueryHelpers.toJsonString(org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString) Test(org.junit.Test)

Example 3 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project java-docs-samples by GoogleCloudPlatform.

the class SpannerReadAll method main.

public static void main(String[] args) {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    Pipeline p = Pipeline.create(options);
    SpannerConfig spannerConfig = SpannerConfig.create().withInstanceId(options.getInstanceId()).withDatabaseId(options.getDatabaseId());
    // [START spanner_dataflow_readall]
    PCollection<Struct> allRecords = p.apply(SpannerIO.read().withSpannerConfig(spannerConfig).withQuery("SELECT t.table_name FROM information_schema.tables AS t WHERE t" + ".table_catalog = '' AND t.table_schema = ''")).apply(MapElements.into(TypeDescriptor.of(ReadOperation.class)).via((SerializableFunction<Struct, ReadOperation>) input -> {
        String tableName = input.getString(0);
        return ReadOperation.create().withQuery("SELECT * FROM " + tableName);
    })).apply(SpannerIO.readAll().withSpannerConfig(spannerConfig));
    // [END spanner_dataflow_readall]
    PCollection<Long> dbEstimatedSize = allRecords.apply(EstimateSize.create()).apply(Sum.longsGlobally());
    dbEstimatedSize.apply(ToString.elements()).apply(TextIO.write().to(options.getOutput()).withoutSharding());
    p.run().waitUntilFinish();
}

Also used : SpannerConfig(org.apache.beam.sdk.io.gcp.spanner.SpannerConfig) MapElements(org.apache.beam.sdk.transforms.MapElements) ToString(org.apache.beam.sdk.transforms.ToString) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) Sum(org.apache.beam.sdk.transforms.Sum) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) PCollection(org.apache.beam.sdk.values.PCollection) SpannerIO(org.apache.beam.sdk.io.gcp.spanner.SpannerIO) SpannerConfig(org.apache.beam.sdk.io.gcp.spanner.SpannerConfig) Description(org.apache.beam.sdk.options.Description) ReadOperation(org.apache.beam.sdk.io.gcp.spanner.ReadOperation) Struct(com.google.cloud.spanner.Struct) Validation(org.apache.beam.sdk.options.Validation) Pipeline(org.apache.beam.sdk.Pipeline) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) TextIO(org.apache.beam.sdk.io.TextIO) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ReadOperation(org.apache.beam.sdk.io.gcp.spanner.ReadOperation) ToString(org.apache.beam.sdk.transforms.ToString) Pipeline(org.apache.beam.sdk.Pipeline) Struct(com.google.cloud.spanner.Struct)

Example 4 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class JdbcIOTest method testWriteWithoutPreparedStatementWithReadRows.

@Test
public void testWriteWithoutPreparedStatementWithReadRows() throws Exception {
    SerializableFunction<Void, DataSource> dataSourceProvider = ignored -> DATA_SOURCE;
    PCollection<Row> rows = pipeline.apply(JdbcIO.readRows().withDataSourceProviderFn(dataSourceProvider).withQuery(String.format("select name,id from %s where name = ?", READ_TABLE_NAME)).withStatementPreparator(preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1))));
    String writeTableName = DatabaseTestHelper.getTestTableName("UT_WRITE_PS_WITH_READ_ROWS");
    DatabaseTestHelper.createTable(DATA_SOURCE, writeTableName);
    try {
        rows.apply(JdbcIO.<Row>write().withDataSourceConfiguration(DATA_SOURCE_CONFIGURATION).withBatchSize(10L).withTable(writeTableName));
        pipeline.run();
    } finally {
        DatabaseTestHelper.deleteTable(DATA_SOURCE, writeTableName);
    }
}

Also used : Count(org.apache.beam.sdk.transforms.Count) ExpectedLogs(org.apache.beam.sdk.testing.ExpectedLogs) Arrays(java.util.Arrays) PipelineExecutionException(org.apache.beam.sdk.Pipeline.PipelineExecutionException) SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) Connection(java.sql.Connection) Time(java.sql.Time) Matchers.not(org.hamcrest.Matchers.not) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Array(java.sql.Array) PoolableDataSourceProvider(org.apache.beam.sdk.io.jdbc.JdbcIO.PoolableDataSourceProvider) BigDecimal(java.math.BigDecimal) Matchers.closeTo(org.hamcrest.Matchers.closeTo) Create(org.apache.beam.sdk.transforms.Create) Wait(org.apache.beam.sdk.transforms.Wait) PoolingDataSource(org.apache.commons.dbcp2.PoolingDataSource) RoundingMode(java.math.RoundingMode) KvCoder(org.apache.beam.sdk.coders.KvCoder) NULL(java.sql.JDBCType.NULL) TimeZone(java.util.TimeZone) Timestamp(java.sql.Timestamp) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) UUID(java.util.UUID) PreparedStatement(java.sql.PreparedStatement) LogRecord(java.util.logging.LogRecord) TypeSafeMatcher(org.hamcrest.TypeSafeMatcher) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) Matchers.any(org.mockito.Matchers.any) List(java.util.List) PartitioningFn(org.apache.beam.sdk.io.jdbc.JdbcUtil.PartitioningFn) ParDo(org.apache.beam.sdk.transforms.ParDo) SerializableUtils(org.apache.beam.sdk.util.SerializableUtils) Assert.assertFalse(org.junit.Assert.assertFalse) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) ISOChronology(org.joda.time.chrono.ISOChronology) TestStream(org.apache.beam.sdk.testing.TestStream) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) DatabaseTestHelper.assertRowCount(org.apache.beam.sdk.io.common.DatabaseTestHelper.assertRowCount) Matchers.containsString(org.hamcrest.Matchers.containsString) Mockito.mock(org.mockito.Mockito.mock) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) BeforeClass(org.junit.BeforeClass) Assert.assertThrows(org.junit.Assert.assertThrows) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) NUMERIC(java.sql.JDBCType.NUMERIC) ArrayList(java.util.ArrayList) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) FixedPrecisionNumeric(org.apache.beam.sdk.io.jdbc.LogicalTypes.FixedPrecisionNumeric) Assert.assertSame(org.junit.Assert.assertSame) JDBCType(java.sql.JDBCType) SQLException(java.sql.SQLException) Calendar(java.util.Calendar) Charset(java.nio.charset.Charset) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) DataSource(javax.sql.DataSource) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Row(org.apache.beam.sdk.values.Row) ExpectedException(org.junit.rules.ExpectedException) Select(org.apache.beam.sdk.schemas.transforms.Select) Description(org.hamcrest.Description) PAssert(org.apache.beam.sdk.testing.PAssert) TestRow(org.apache.beam.sdk.io.common.TestRow) DataSourceConfiguration(org.apache.beam.sdk.io.jdbc.JdbcIO.DataSourceConfiguration) DateTime(org.joda.time.DateTime) Assert.assertTrue(org.junit.Assert.assertTrue) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) Mockito.verify(org.mockito.Mockito.verify) Date(java.sql.Date) LocalDate(org.joda.time.LocalDate) Rule(org.junit.Rule) Instant(org.joda.time.Instant) Statement(java.sql.Statement) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) Collections(java.util.Collections) DatabaseTestHelper(org.apache.beam.sdk.io.common.DatabaseTestHelper) Assert.assertEquals(org.junit.Assert.assertEquals) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) Row(org.apache.beam.sdk.values.Row) TestRow(org.apache.beam.sdk.io.common.TestRow) Matchers.containsString(org.hamcrest.Matchers.containsString) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) PoolingDataSource(org.apache.commons.dbcp2.PoolingDataSource) DataSource(javax.sql.DataSource) Test(org.junit.Test)

Example 5 with SerializableFunction

use of org.apache.beam.sdk.transforms.SerializableFunction in project beam by apache.

the class JdbcIOTest method testReadWithSchema.

@Test
public void testReadWithSchema() {
    SerializableFunction<Void, DataSource> dataSourceProvider = ignored -> DATA_SOURCE;
    JdbcIO.RowMapper<RowWithSchema> rowMapper = rs -> new RowWithSchema(rs.getString("NAME"), rs.getInt("ID"));
    pipeline.getSchemaRegistry().registerJavaBean(RowWithSchema.class);
    PCollection<RowWithSchema> rows = pipeline.apply(JdbcIO.<RowWithSchema>read().withDataSourceProviderFn(dataSourceProvider).withQuery(String.format("select name,id from %s where name = ?", READ_TABLE_NAME)).withRowMapper(rowMapper).withCoder(SerializableCoder.of(RowWithSchema.class)).withStatementPreparator(preparedStatement -> preparedStatement.setString(1, TestRow.getNameForSeed(1))));
    Schema expectedSchema = Schema.of(Schema.Field.of("name", Schema.FieldType.STRING), Schema.Field.of("id", Schema.FieldType.INT32));
    assertEquals(expectedSchema, rows.getSchema());
    PCollection<Row> output = rows.apply(Select.fieldNames("name", "id"));
    PAssert.that(output).containsInAnyOrder(ImmutableList.of(Row.withSchema(expectedSchema).addValues("Testval1", 1).build()));
    pipeline.run();
}

Also used : Count(org.apache.beam.sdk.transforms.Count) ExpectedLogs(org.apache.beam.sdk.testing.ExpectedLogs) Arrays(java.util.Arrays) PipelineExecutionException(org.apache.beam.sdk.Pipeline.PipelineExecutionException) SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) Connection(java.sql.Connection) Time(java.sql.Time) Matchers.not(org.hamcrest.Matchers.not) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Array(java.sql.Array) PoolableDataSourceProvider(org.apache.beam.sdk.io.jdbc.JdbcIO.PoolableDataSourceProvider) BigDecimal(java.math.BigDecimal) Matchers.closeTo(org.hamcrest.Matchers.closeTo) Create(org.apache.beam.sdk.transforms.Create) Wait(org.apache.beam.sdk.transforms.Wait) PoolingDataSource(org.apache.commons.dbcp2.PoolingDataSource) RoundingMode(java.math.RoundingMode) KvCoder(org.apache.beam.sdk.coders.KvCoder) NULL(java.sql.JDBCType.NULL) TimeZone(java.util.TimeZone) Timestamp(java.sql.Timestamp) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) UUID(java.util.UUID) PreparedStatement(java.sql.PreparedStatement) LogRecord(java.util.logging.LogRecord) TypeSafeMatcher(org.hamcrest.TypeSafeMatcher) StandardCharsets(java.nio.charset.StandardCharsets) Serializable(java.io.Serializable) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) Matchers.any(org.mockito.Matchers.any) List(java.util.List) PartitioningFn(org.apache.beam.sdk.io.jdbc.JdbcUtil.PartitioningFn) ParDo(org.apache.beam.sdk.transforms.ParDo) SerializableUtils(org.apache.beam.sdk.util.SerializableUtils) Assert.assertFalse(org.junit.Assert.assertFalse) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) ISOChronology(org.joda.time.chrono.ISOChronology) TestStream(org.apache.beam.sdk.testing.TestStream) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) DatabaseTestHelper.assertRowCount(org.apache.beam.sdk.io.common.DatabaseTestHelper.assertRowCount) Matchers.containsString(org.hamcrest.Matchers.containsString) Mockito.mock(org.mockito.Mockito.mock) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) BeforeClass(org.junit.BeforeClass) Assert.assertThrows(org.junit.Assert.assertThrows) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) NUMERIC(java.sql.JDBCType.NUMERIC) ArrayList(java.util.ArrayList) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) FixedPrecisionNumeric(org.apache.beam.sdk.io.jdbc.LogicalTypes.FixedPrecisionNumeric) Assert.assertSame(org.junit.Assert.assertSame) JDBCType(java.sql.JDBCType) SQLException(java.sql.SQLException) Calendar(java.util.Calendar) Charset(java.nio.charset.Charset) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) DataSource(javax.sql.DataSource) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Row(org.apache.beam.sdk.values.Row) ExpectedException(org.junit.rules.ExpectedException) Select(org.apache.beam.sdk.schemas.transforms.Select) Description(org.hamcrest.Description) PAssert(org.apache.beam.sdk.testing.PAssert) TestRow(org.apache.beam.sdk.io.common.TestRow) DataSourceConfiguration(org.apache.beam.sdk.io.jdbc.JdbcIO.DataSourceConfiguration) DateTime(org.joda.time.DateTime) Assert.assertTrue(org.junit.Assert.assertTrue) Mockito.times(org.mockito.Mockito.times) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) Mockito.verify(org.mockito.Mockito.verify) Date(java.sql.Date) LocalDate(org.joda.time.LocalDate) Rule(org.junit.Rule) Instant(org.joda.time.Instant) Statement(java.sql.Statement) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) Collections(java.util.Collections) DatabaseTestHelper(org.apache.beam.sdk.io.common.DatabaseTestHelper) Assert.assertEquals(org.junit.Assert.assertEquals) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) Schema(org.apache.beam.sdk.schemas.Schema) Row(org.apache.beam.sdk.values.Row) TestRow(org.apache.beam.sdk.io.common.TestRow) PoolingDataSource(org.apache.commons.dbcp2.PoolingDataSource) DataSource(javax.sql.DataSource) Test(org.junit.Test)

Aggregations

SerializableFunction (org.apache.beam.sdk.transforms.SerializableFunction)37 Test (org.junit.Test)27 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)23 PCollection (org.apache.beam.sdk.values.PCollection)22 PAssert (org.apache.beam.sdk.testing.PAssert)20 Instant (org.joda.time.Instant)17 Rule (org.junit.Rule)17 List (java.util.List)16 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)16 RunWith (org.junit.runner.RunWith)16 Map (java.util.Map)15 Duration (org.joda.time.Duration)14 JUnit4 (org.junit.runners.JUnit4)13 ArrayList (java.util.ArrayList)12 Collections (java.util.Collections)12 Create (org.apache.beam.sdk.transforms.Create)12 Arrays (java.util.Arrays)11 ParDo (org.apache.beam.sdk.transforms.ParDo)11 KV (org.apache.beam.sdk.values.KV)11 Assert.assertEquals (org.junit.Assert.assertEquals)10