Search in sources :

Example 1 with TestRow

use of org.apache.beam.sdk.io.common.TestRow in project beam by apache.

the class DynamoDBIOIT method runWrite.

/**
 * Write test dataset to DynamoDB.
 */
private void runWrite() {
    int rows = env.options().getNumberOfRows();
    pipelineWrite.apply("Generate Sequence", GenerateSequence.from(0).to(rows)).apply("Prepare TestRows", ParDo.of(new DeterministicallyConstructTestRowFn())).apply("Write to DynamoDB", DynamoDBIO.<TestRow>write().withAwsClientsProvider(clientProvider()).withWriteRequestMapperFn(row -> buildWriteRequest(row)));
    pipelineWrite.run().waitUntilFinish();
}
Also used : Count(org.apache.beam.sdk.transforms.Count) KV(org.apache.beam.sdk.values.KV) PutRequest(com.amazonaws.services.dynamodbv2.model.PutRequest) AttributeDefinition(com.amazonaws.services.dynamodbv2.model.AttributeDefinition) KeySchemaElement(com.amazonaws.services.dynamodbv2.model.KeySchemaElement) Default(org.apache.beam.sdk.options.Default) Combine(org.apache.beam.sdk.transforms.Combine) KeyType(com.amazonaws.services.dynamodbv2.model.KeyType) RunWith(org.junit.runner.RunWith) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) WriteRequest(com.amazonaws.services.dynamodbv2.model.WriteRequest) Regions(com.amazonaws.regions.Regions) Description(org.apache.beam.sdk.options.Description) TableStatus(com.amazonaws.services.dynamodbv2.model.TableStatus) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) TypeDescriptors.strings(org.apache.beam.sdk.values.TypeDescriptors.strings) ITEnvironment(org.apache.beam.sdk.io.aws.ITEnvironment) TestRow.getExpectedHashForRowCount(org.apache.beam.sdk.io.common.TestRow.getExpectedHashForRowCount) ClassRule(org.junit.ClassRule) AWSCredentials(com.amazonaws.auth.AWSCredentials) Flatten(org.apache.beam.sdk.transforms.Flatten) MapElements(org.apache.beam.sdk.transforms.MapElements) HashingFn(org.apache.beam.sdk.io.common.HashingFn) DeterministicallyConstructTestRowFn(org.apache.beam.sdk.io.common.TestRow.DeterministicallyConstructTestRowFn) PAssert(org.apache.beam.sdk.testing.PAssert) TestRow(org.apache.beam.sdk.io.common.TestRow) ScanRequest(com.amazonaws.services.dynamodbv2.model.ScanRequest) AmazonDynamoDBClientBuilder(com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) AmazonDynamoDB(com.amazonaws.services.dynamodbv2.AmazonDynamoDB) PCollection(org.apache.beam.sdk.values.PCollection) CreateTableRequest(com.amazonaws.services.dynamodbv2.model.CreateTableRequest) ScalarAttributeType(com.amazonaws.services.dynamodbv2.model.ScalarAttributeType) ProvisionedThroughput(com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput) Rule(org.junit.Rule) ExternalResource(org.junit.rules.ExternalResource) ParDo(org.apache.beam.sdk.transforms.ParDo) DYNAMODB(org.testcontainers.containers.localstack.LocalStackContainer.Service.DYNAMODB) DeterministicallyConstructTestRowFn(org.apache.beam.sdk.io.common.TestRow.DeterministicallyConstructTestRowFn)

Example 2 with TestRow

use of org.apache.beam.sdk.io.common.TestRow in project beam by apache.

the class DynamoDBIOIT method runWrite.

/**
 * Write test dataset to DynamoDB.
 */
private void runWrite() {
    int rows = env.options().getNumberOfRows();
    pipelineWrite.apply("Generate Sequence", GenerateSequence.from(0).to(rows)).apply("Prepare TestRows", ParDo.of(new DeterministicallyConstructTestRowFn())).apply("Write to DynamoDB", DynamoDBIO.<TestRow>write().withWriteRequestMapperFn(row -> buildWriteRequest(row)));
    pipelineWrite.run().waitUntilFinish();
}
Also used : Count(org.apache.beam.sdk.transforms.Count) KV(org.apache.beam.sdk.values.KV) Default(org.apache.beam.sdk.options.Default) Combine(org.apache.beam.sdk.transforms.Combine) RunWith(org.junit.runner.RunWith) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Description(org.apache.beam.sdk.options.Description) ProvisionedThroughput(software.amazon.awssdk.services.dynamodb.model.ProvisionedThroughput) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) CreateTableRequest(software.amazon.awssdk.services.dynamodb.model.CreateTableRequest) ScanRequest(software.amazon.awssdk.services.dynamodb.model.ScanRequest) TypeDescriptors.strings(org.apache.beam.sdk.values.TypeDescriptors.strings) WriteRequest(software.amazon.awssdk.services.dynamodb.model.WriteRequest) TestRow.getExpectedHashForRowCount(org.apache.beam.sdk.io.common.TestRow.getExpectedHashForRowCount) PutRequest(software.amazon.awssdk.services.dynamodb.model.PutRequest) ClassRule(org.junit.ClassRule) ScalarAttributeType(software.amazon.awssdk.services.dynamodb.model.ScalarAttributeType) Flatten(org.apache.beam.sdk.transforms.Flatten) MapElements(org.apache.beam.sdk.transforms.MapElements) DynamoDbClient(software.amazon.awssdk.services.dynamodb.DynamoDbClient) HashingFn(org.apache.beam.sdk.io.common.HashingFn) DeterministicallyConstructTestRowFn(org.apache.beam.sdk.io.common.TestRow.DeterministicallyConstructTestRowFn) TableStatus(software.amazon.awssdk.services.dynamodb.model.TableStatus) PAssert(org.apache.beam.sdk.testing.PAssert) TestRow(org.apache.beam.sdk.io.common.TestRow) KeyType(software.amazon.awssdk.services.dynamodb.model.KeyType) ITEnvironment(org.apache.beam.sdk.io.aws2.ITEnvironment) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) AttributeDefinition(software.amazon.awssdk.services.dynamodb.model.AttributeDefinition) PCollection(org.apache.beam.sdk.values.PCollection) Rule(org.junit.Rule) ExternalResource(org.junit.rules.ExternalResource) KeySchemaElement(software.amazon.awssdk.services.dynamodb.model.KeySchemaElement) ParDo(org.apache.beam.sdk.transforms.ParDo) AttributeValue(software.amazon.awssdk.services.dynamodb.model.AttributeValue) DYNAMODB(org.testcontainers.containers.localstack.LocalStackContainer.Service.DYNAMODB) DeterministicallyConstructTestRowFn(org.apache.beam.sdk.io.common.TestRow.DeterministicallyConstructTestRowFn) TestRow(org.apache.beam.sdk.io.common.TestRow)

Example 3 with TestRow

use of org.apache.beam.sdk.io.common.TestRow in project beam by apache.

the class StreamingSnowflakeIOIT method readDataFromStream.

private Set<TestRow> readDataFromStream() throws SQLException {
    Connection connection = dc.buildDatasource().getConnection();
    PreparedStatement statement = connection.prepareStatement(String.format("SELECT * FROM %s", TABLE));
    ResultSet resultSet = statement.executeQuery();
    Set<TestRow> testRows = resultSetToJavaSet(resultSet);
    resultSet.close();
    statement.close();
    connection.close();
    return testRows;
}
Also used : TestRow(org.apache.beam.sdk.io.common.TestRow) Connection(java.sql.Connection) ResultSet(java.sql.ResultSet) PreparedStatement(java.sql.PreparedStatement)

Example 4 with TestRow

use of org.apache.beam.sdk.io.common.TestRow in project beam by apache.

the class SnsIOIT method testWriteThenRead.

@Test
public void testWriteThenRead() {
    ITOptions opts = env.options();
    int rows = opts.getNumberOfRows();
    // Write test dataset to SNS
    pipelineWrite.apply("Generate Sequence", GenerateSequence.from(0).to(rows)).apply("Prepare TestRows", ParDo.of(new DeterministicallyConstructTestRowFn())).apply("Write to SNS", SnsIO.<TestRow>write().withTopicArn(resources.snsTopic).withPublishRequestBuilder(r -> PublishRequest.builder().message(r.name())));
    // Read test dataset from SQS.
    PCollection<String> output = pipelineRead.apply("Read from SQS", SqsIO.read().withQueueUrl(resources.sqsQueue).withMaxNumRecords(rows)).apply("Extract message", MapElements.into(strings()).via(SnsIOIT::extractMessage));
    PAssert.thatSingleton(output.apply("Count All", Count.globally())).isEqualTo((long) rows);
    PAssert.that(output.apply(Combine.globally(new HashingFn()).withoutDefaults())).containsInAnyOrder(getExpectedHashForRowCount(rows));
    pipelineWrite.run();
    pipelineRead.run();
}
Also used : Count(org.apache.beam.sdk.transforms.Count) Combine(org.apache.beam.sdk.transforms.Combine) RunWith(org.junit.runner.RunWith) PublishRequest(software.amazon.awssdk.services.sns.model.PublishRequest) IOITHelper.executeWithRetry(org.apache.beam.sdk.io.common.IOITHelper.executeWithRetry) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) TypeDescriptors.strings(org.apache.beam.sdk.values.TypeDescriptors.strings) Timeout(org.junit.rules.Timeout) TestRow.getExpectedHashForRowCount(org.apache.beam.sdk.io.common.TestRow.getExpectedHashForRowCount) ClassRule(org.junit.ClassRule) SqsIO(org.apache.beam.sdk.io.aws2.sqs.SqsIO) Service(org.testcontainers.containers.localstack.LocalStackContainer.Service) MapElements(org.apache.beam.sdk.transforms.MapElements) SNS(org.testcontainers.containers.localstack.LocalStackContainer.Service.SNS) HashingFn(org.apache.beam.sdk.io.common.HashingFn) DeterministicallyConstructTestRowFn(org.apache.beam.sdk.io.common.TestRow.DeterministicallyConstructTestRowFn) PAssert(org.apache.beam.sdk.testing.PAssert) TestRow(org.apache.beam.sdk.io.common.TestRow) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ITEnvironment(org.apache.beam.sdk.io.aws2.ITEnvironment) SqsClient(software.amazon.awssdk.services.sqs.SqsClient) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Serializable(java.io.Serializable) Rule(org.junit.Rule) ExternalResource(org.junit.rules.ExternalResource) ParDo(org.apache.beam.sdk.transforms.ParDo) SnsClient(software.amazon.awssdk.services.sns.SnsClient) SqsMessage(org.apache.beam.sdk.io.aws2.sqs.SqsMessage) SQS(org.testcontainers.containers.localstack.LocalStackContainer.Service.SQS) DeterministicallyConstructTestRowFn(org.apache.beam.sdk.io.common.TestRow.DeterministicallyConstructTestRowFn) HashingFn(org.apache.beam.sdk.io.common.HashingFn) Test(org.junit.Test)

Example 5 with TestRow

use of org.apache.beam.sdk.io.common.TestRow in project beam by apache.

the class JdbcIOIT method runRead.

/**
 * Read the test dataset from postgres and validate its contents.
 *
 * <p>When doing the validation, we wish to ensure that we: 1. Ensure *all* the rows are correct
 * 2. Provide enough information in assertions such that it is easy to spot obvious errors (e.g.
 * all elements have a similar mistake, or "only 5 elements were generated" and the user wants to
 * see what the problem was.
 *
 * <p>We do not wish to generate and compare all of the expected values, so this method uses
 * hashing to ensure that all expected data is present. However, hashing does not provide easy
 * debugging information (failures like "every element was empty string" are hard to see), so we
 * also: 1. Generate expected values for the first and last 500 rows 2. Use containsInAnyOrder to
 * verify that their values are correct. Where first/last 500 rows is determined by the fact that
 * we know all rows have a unique id - we can use the natural ordering of that key.
 */
private PipelineResult runRead() {
    PCollection<TestRow> namesAndIds = pipelineRead.apply(JdbcIO.<TestRow>read().withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(dataSource)).withQuery(String.format("select name,id from %s;", tableName)).withRowMapper(new JdbcTestHelper.CreateTestRowOfNameAndId())).apply(ParDo.of(new TimeMonitor<>(NAMESPACE, "read_time")));
    PAssert.thatSingleton(namesAndIds.apply("Count All", Count.globally())).isEqualTo((long) numberOfRows);
    PCollection<String> consolidatedHashcode = namesAndIds.apply(ParDo.of(new TestRow.SelectNameFn())).apply("Hash row contents", Combine.globally(new HashingFn()).withoutDefaults());
    PAssert.that(consolidatedHashcode).containsInAnyOrder(TestRow.getExpectedHashForRowCount(numberOfRows));
    PCollection<List<TestRow>> frontOfList = namesAndIds.apply(Top.smallest(500));
    Iterable<TestRow> expectedFrontOfList = TestRow.getExpectedValues(0, 500);
    PAssert.thatSingletonIterable(frontOfList).containsInAnyOrder(expectedFrontOfList);
    PCollection<List<TestRow>> backOfList = namesAndIds.apply(Top.largest(500));
    Iterable<TestRow> expectedBackOfList = TestRow.getExpectedValues(numberOfRows - 500, numberOfRows);
    PAssert.thatSingletonIterable(backOfList).containsInAnyOrder(expectedBackOfList);
    return pipelineRead.run();
}
Also used : TimeMonitor(org.apache.beam.sdk.testutils.metrics.TimeMonitor) TestRow(org.apache.beam.sdk.io.common.TestRow) ArrayList(java.util.ArrayList) List(java.util.List) HashingFn(org.apache.beam.sdk.io.common.HashingFn)

Aggregations

TestRow (org.apache.beam.sdk.io.common.TestRow)7 HashingFn (org.apache.beam.sdk.io.common.HashingFn)5 GenerateSequence (org.apache.beam.sdk.io.GenerateSequence)3 DeterministicallyConstructTestRowFn (org.apache.beam.sdk.io.common.TestRow.DeterministicallyConstructTestRowFn)3 TestRow.getExpectedHashForRowCount (org.apache.beam.sdk.io.common.TestRow.getExpectedHashForRowCount)3 PAssert (org.apache.beam.sdk.testing.PAssert)3 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)3 Combine (org.apache.beam.sdk.transforms.Combine)3 Count (org.apache.beam.sdk.transforms.Count)3 MapElements (org.apache.beam.sdk.transforms.MapElements)3 ParDo (org.apache.beam.sdk.transforms.ParDo)3 PCollection (org.apache.beam.sdk.values.PCollection)3 TypeDescriptors.strings (org.apache.beam.sdk.values.TypeDescriptors.strings)3 ClassRule (org.junit.ClassRule)3 Rule (org.junit.Rule)3 Test (org.junit.Test)3 ExternalResource (org.junit.rules.ExternalResource)3 RunWith (org.junit.runner.RunWith)3 JUnit4 (org.junit.runners.JUnit4)3 Map (java.util.Map)2