Search in sources :

Example 6 with Count

use of org.apache.beam.sdk.transforms.Count in project beam by apache.

the class DynamoDBIOIT method runRead.

/**
 * Read test dataset from DynamoDB.
 */
private void runRead() {
    int rows = env.options().getNumberOfRows();
    PCollection<Map<String, AttributeValue>> records = pipelineRead.apply("Read from DynamoDB", DynamoDBIO.read().withAwsClientsProvider(clientProvider()).withScanRequestFn(in -> buildScanRequest()).items()).apply("Flatten result", Flatten.iterables());
    PAssert.thatSingleton(records.apply("Count All", Count.globally())).isEqualTo((long) rows);
    PCollection<String> consolidatedHashcode = records.apply(MapElements.into(strings()).via(record -> record.get(COL_NAME).getS())).apply("Hash records", Combine.globally(new HashingFn()).withoutDefaults());
    PAssert.that(consolidatedHashcode).containsInAnyOrder(getExpectedHashForRowCount(rows));
    pipelineRead.run().waitUntilFinish();
}
Also used : Count(org.apache.beam.sdk.transforms.Count) KV(org.apache.beam.sdk.values.KV) PutRequest(com.amazonaws.services.dynamodbv2.model.PutRequest) AttributeDefinition(com.amazonaws.services.dynamodbv2.model.AttributeDefinition) KeySchemaElement(com.amazonaws.services.dynamodbv2.model.KeySchemaElement) Default(org.apache.beam.sdk.options.Default) Combine(org.apache.beam.sdk.transforms.Combine) KeyType(com.amazonaws.services.dynamodbv2.model.KeyType) RunWith(org.junit.runner.RunWith) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) WriteRequest(com.amazonaws.services.dynamodbv2.model.WriteRequest) Regions(com.amazonaws.regions.Regions) Description(org.apache.beam.sdk.options.Description) TableStatus(com.amazonaws.services.dynamodbv2.model.TableStatus) AttributeValue(com.amazonaws.services.dynamodbv2.model.AttributeValue) Map(java.util.Map) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) TypeDescriptors.strings(org.apache.beam.sdk.values.TypeDescriptors.strings) ITEnvironment(org.apache.beam.sdk.io.aws.ITEnvironment) TestRow.getExpectedHashForRowCount(org.apache.beam.sdk.io.common.TestRow.getExpectedHashForRowCount) ClassRule(org.junit.ClassRule) AWSCredentials(com.amazonaws.auth.AWSCredentials) Flatten(org.apache.beam.sdk.transforms.Flatten) MapElements(org.apache.beam.sdk.transforms.MapElements) HashingFn(org.apache.beam.sdk.io.common.HashingFn) DeterministicallyConstructTestRowFn(org.apache.beam.sdk.io.common.TestRow.DeterministicallyConstructTestRowFn) PAssert(org.apache.beam.sdk.testing.PAssert) TestRow(org.apache.beam.sdk.io.common.TestRow) ScanRequest(com.amazonaws.services.dynamodbv2.model.ScanRequest) AmazonDynamoDBClientBuilder(com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) AmazonDynamoDB(com.amazonaws.services.dynamodbv2.AmazonDynamoDB) PCollection(org.apache.beam.sdk.values.PCollection) CreateTableRequest(com.amazonaws.services.dynamodbv2.model.CreateTableRequest) ScalarAttributeType(com.amazonaws.services.dynamodbv2.model.ScalarAttributeType) ProvisionedThroughput(com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput) Rule(org.junit.Rule) ExternalResource(org.junit.rules.ExternalResource) ParDo(org.apache.beam.sdk.transforms.ParDo) DYNAMODB(org.testcontainers.containers.localstack.LocalStackContainer.Service.DYNAMODB) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) HashingFn(org.apache.beam.sdk.io.common.HashingFn)

Example 7 with Count

use of org.apache.beam.sdk.transforms.Count in project beam by apache.

the class MongoDBGridFSIOTest method testReadWithParser.

@Test
public void testReadWithParser() {
    PCollection<KV<String, Integer>> output = pipeline.apply(MongoDbGridFSIO.read().withUri("mongodb://localhost:" + port).withDatabase(DATABASE).withBucket("mapBucket").<KV<String, Integer>>withParser((input, callback) -> {
        try (final BufferedReader reader = new BufferedReader(new InputStreamReader(input.getInputStream(), StandardCharsets.UTF_8))) {
            String line = reader.readLine();
            while (line != null) {
                try (Scanner scanner = new Scanner(line.trim())) {
                    scanner.useDelimiter("\\t");
                    long timestamp = scanner.nextLong();
                    String name = scanner.next();
                    int score = scanner.nextInt();
                    callback.output(KV.of(name, score), new Instant(timestamp));
                }
                line = reader.readLine();
            }
        }
    }).withSkew(Duration.millis(3610000L)).withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())));
    PAssert.thatSingleton(output.apply("Count All", Count.globally())).isEqualTo(50100L);
    PAssert.that(output.apply("Max PerElement", Max.integersPerKey())).satisfies(input -> {
        for (KV<String, Integer> element : input) {
            assertEquals(101, element.getValue().longValue());
        }
        return null;
    });
    pipeline.run();
}
Also used : Count(org.apache.beam.sdk.transforms.Count) LoggerFactory(org.slf4j.LoggerFactory) GridFSInputFile(com.mongodb.gridfs.GridFSInputFile) Scanner(java.util.Scanner) Random(java.util.Random) ByteArrayInputStream(java.io.ByteArrayInputStream) Create(org.apache.beam.sdk.transforms.Create) IMongodConfig(de.flapdoodle.embed.mongo.config.IMongodConfig) ClassRule(org.junit.ClassRule) KvCoder(org.apache.beam.sdk.coders.KvCoder) AfterClass(org.junit.AfterClass) GridFSDBFile(com.mongodb.gridfs.GridFSDBFile) StandardCharsets(java.nio.charset.StandardCharsets) GridFS(com.mongodb.gridfs.GridFS) List(java.util.List) Max(org.apache.beam.sdk.transforms.Max) Network(de.flapdoodle.embed.process.runtime.Network) DB(com.mongodb.DB) NetworkTestHelper(org.apache.beam.sdk.io.common.NetworkTestHelper) DataInputStream(java.io.DataInputStream) KV(org.apache.beam.sdk.values.KV) ByteArrayOutputStream(java.io.ByteArrayOutputStream) BeforeClass(org.junit.BeforeClass) Net(de.flapdoodle.embed.mongo.config.Net) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) MongodExecutable(de.flapdoodle.embed.mongo.MongodExecutable) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) MongodConfigBuilder(de.flapdoodle.embed.mongo.config.MongodConfigBuilder) ArrayList(java.util.ArrayList) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) Version(de.flapdoodle.embed.mongo.distribution.Version) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) OutputStreamWriter(java.io.OutputStreamWriter) Storage(de.flapdoodle.embed.mongo.config.Storage) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) OutputStream(java.io.OutputStream) SourceTestUtils(org.apache.beam.sdk.testing.SourceTestUtils) Logger(org.slf4j.Logger) PAssert(org.apache.beam.sdk.testing.PAssert) MongoCmdOptionsBuilder(de.flapdoodle.embed.mongo.config.MongoCmdOptionsBuilder) Assert.assertTrue(org.junit.Assert.assertTrue) BoundedGridFSSource(org.apache.beam.sdk.io.mongodb.MongoDbGridFSIO.Read.BoundedGridFSSource) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) InputStreamReader(java.io.InputStreamReader) MongodProcess(de.flapdoodle.embed.mongo.MongodProcess) BoundedSource(org.apache.beam.sdk.io.BoundedSource) Rule(org.junit.Rule) MongoClient(com.mongodb.MongoClient) MongodStarter(de.flapdoodle.embed.mongo.MongodStarter) Instant(org.joda.time.Instant) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) ObjectId(org.bson.types.ObjectId) BufferedReader(java.io.BufferedReader) Assert.assertEquals(org.junit.Assert.assertEquals) TemporaryFolder(org.junit.rules.TemporaryFolder) InputStream(java.io.InputStream) Scanner(java.util.Scanner) InputStreamReader(java.io.InputStreamReader) Instant(org.joda.time.Instant) BufferedReader(java.io.BufferedReader) KV(org.apache.beam.sdk.values.KV) Test(org.junit.Test)

Aggregations

Count (org.apache.beam.sdk.transforms.Count)7 PCollection (org.apache.beam.sdk.values.PCollection)7 Rule (org.junit.Rule)7 Test (org.junit.Test)7 ParDo (org.apache.beam.sdk.transforms.ParDo)6 KV (org.apache.beam.sdk.values.KV)6 PAssert (org.apache.beam.sdk.testing.PAssert)5 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)5 Combine (org.apache.beam.sdk.transforms.Combine)5 RunWith (org.junit.runner.RunWith)5 JUnit4 (org.junit.runners.JUnit4)5 Serializable (java.io.Serializable)4 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)4 VarIntCoder (org.apache.beam.sdk.coders.VarIntCoder)4 TestRow (org.apache.beam.sdk.io.common.TestRow)4 Flatten (org.apache.beam.sdk.transforms.Flatten)4 Duration (org.joda.time.Duration)4 Instant (org.joda.time.Instant)4 ClassRule (org.junit.ClassRule)4 GenerateSequence (org.apache.beam.sdk.io.GenerateSequence)3