use of org.apache.beam.sdk.transforms.Count in project beam by apache.
the class TestStreamTest method testEarlyPanesOfWindow.
@Test
@Category({ ValidatesRunner.class, UsesTestStreamWithProcessingTime.class })
public void testEarlyPanesOfWindow() {
TestStream<Long> source = TestStream.create(VarLongCoder.of()).addElements(TimestampedValue.of(1L, new Instant(1000L))).advanceProcessingTime(// Fire early pane
Duration.standardMinutes(6)).addElements(TimestampedValue.of(2L, new Instant(2000L))).advanceProcessingTime(// Fire early pane
Duration.standardMinutes(6)).addElements(TimestampedValue.of(3L, new Instant(3000L))).advanceProcessingTime(// Fire early pane
Duration.standardMinutes(6)).advanceWatermarkToInfinity();
PCollection<KV<String, Long>> sum = p.apply(source).apply(Window.<Long>into(FixedWindows.of(Duration.standardMinutes(30))).triggering(AfterWatermark.pastEndOfWindow().withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.standardMinutes(5)))).accumulatingFiredPanes().withAllowedLateness(Duration.ZERO)).apply(MapElements.into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.longs())).via(v -> KV.of("key", v))).apply(Sum.longsPerKey());
IntervalWindow window = new IntervalWindow(new Instant(0L), new Instant(0L).plus(Duration.standardMinutes(30)));
PAssert.that(sum).inEarlyPane(window).satisfies(input -> {
assertThat(StreamSupport.stream(input.spliterator(), false).count(), is(3L));
return null;
}).containsInAnyOrder(KV.of("key", 1L), KV.of("key", 3L), KV.of("key", 6L)).inOnTimePane(window).satisfies(input -> {
assertThat(StreamSupport.stream(input.spliterator(), false).count(), is(1L));
return null;
}).containsInAnyOrder(KV.of("key", 6L));
p.run().waitUntilFinish();
}
use of org.apache.beam.sdk.transforms.Count in project beam by apache.
the class CreateStreamTest method testInStreamingModeCountByKey.
@Test
public void testInStreamingModeCountByKey() throws Exception {
Instant instant = new Instant(0);
CreateStream<KV<Integer, Long>> kvSource = CreateStream.of(KvCoder.of(VarIntCoder.of(), VarLongCoder.of()), batchDuration()).emptyBatch().advanceWatermarkForNextBatch(instant).nextBatch(TimestampedValue.of(KV.of(1, 100L), instant.plus(Duration.standardSeconds(3L))), TimestampedValue.of(KV.of(1, 300L), instant.plus(Duration.standardSeconds(4L)))).advanceWatermarkForNextBatch(instant.plus(Duration.standardSeconds(7L))).nextBatch(TimestampedValue.of(KV.of(1, 400L), instant.plus(Duration.standardSeconds(8L)))).advanceNextBatchWatermarkToInfinity();
PCollection<KV<Integer, Long>> output = p.apply("create kv Source", kvSource).apply("window input", Window.<KV<Integer, Long>>into(FixedWindows.of(Duration.standardSeconds(3L))).withAllowedLateness(Duration.ZERO)).apply(Count.perKey());
PAssert.that("Wrong count value ", output).satisfies((SerializableFunction<Iterable<KV<Integer, Long>>, Void>) input -> {
for (KV<Integer, Long> element : input) {
if (element.getKey() == 1) {
Long countValue = element.getValue();
assertNotEquals("Count Value is 0 !!!", 0L, countValue.longValue());
} else {
fail("Unknown key in the output PCollection");
}
}
return null;
});
p.run();
}
use of org.apache.beam.sdk.transforms.Count in project beam by apache.
the class JdbcIOTest method testCustomFluentBackOffConfiguration.
@Test
public void testCustomFluentBackOffConfiguration() throws Exception {
String tableName = DatabaseTestHelper.getTestTableName("UT_FLUENT_BACKOFF");
DatabaseTestHelper.createTable(DATA_SOURCE, tableName);
// lock table
Connection connection = DATA_SOURCE.getConnection();
Statement lockStatement = connection.createStatement();
lockStatement.execute("ALTER TABLE " + tableName + " LOCKSIZE TABLE");
lockStatement.execute("LOCK TABLE " + tableName + " IN EXCLUSIVE MODE");
// start a first transaction
connection.setAutoCommit(false);
PreparedStatement insertStatement = connection.prepareStatement("insert into " + tableName + " values(?, ?)");
insertStatement.setInt(1, 1);
insertStatement.setString(2, "TEST");
insertStatement.execute();
pipeline.apply(Create.of(Collections.singletonList(KV.of(1, "TEST")))).apply(JdbcIO.<KV<Integer, String>>write().withDataSourceConfiguration(DATA_SOURCE_CONFIGURATION).withStatement(String.format("insert into %s values(?, ?)", tableName)).withRetryStrategy((JdbcIO.RetryStrategy) e -> {
return "40XL1".equals(e.getSQLState());
}).withRetryConfiguration(JdbcIO.RetryConfiguration.create(2, null, Duration.standardSeconds(1))).withPreparedStatementSetter((element, statement) -> {
statement.setInt(1, element.getKey());
statement.setString(2, element.getValue());
}));
PipelineExecutionException exception = assertThrows(PipelineExecutionException.class, () -> {
pipeline.run().waitUntilFinish();
});
// Finally commit the original connection, now that the pipeline has failed due to deadlock.
connection.commit();
assertThat(exception.getMessage(), containsString("java.sql.BatchUpdateException: A lock could not be obtained within the time requested"));
// Verify that pipeline retried the write twice, but encountered a deadlock every time.
expectedLogs.verifyLogRecords(new TypeSafeMatcher<Iterable<LogRecord>>() {
@Override
public void describeTo(Description description) {
}
@Override
protected boolean matchesSafely(Iterable<LogRecord> logRecords) {
int count = 0;
for (LogRecord logRecord : logRecords) {
if (logRecord.getMessage().contains("Deadlock detected, retrying")) {
count += 1;
}
}
// Max retries will be 2 + the original deadlock error.
return count == 3;
}
});
// Since the pipeline was unable to write, only the row from insertStatement was written.
assertRowCount(DATA_SOURCE, tableName, 1);
}
use of org.apache.beam.sdk.transforms.Count in project beam by apache.
the class SnsIOIT method testWriteThenRead.
@Test
public void testWriteThenRead() {
ITOptions opts = env.options();
int rows = opts.getNumberOfRows();
// Write test dataset to SNS
pipelineWrite.apply("Generate Sequence", GenerateSequence.from(0).to(rows)).apply("Prepare TestRows", ParDo.of(new DeterministicallyConstructTestRowFn())).apply("Write to SNS", SnsIO.<TestRow>write().withTopicArn(resources.snsTopic).withPublishRequestBuilder(r -> PublishRequest.builder().message(r.name())));
// Read test dataset from SQS.
PCollection<String> output = pipelineRead.apply("Read from SQS", SqsIO.read().withQueueUrl(resources.sqsQueue).withMaxNumRecords(rows)).apply("Extract message", MapElements.into(strings()).via(SnsIOIT::extractMessage));
PAssert.thatSingleton(output.apply("Count All", Count.globally())).isEqualTo((long) rows);
PAssert.that(output.apply(Combine.globally(new HashingFn()).withoutDefaults())).containsInAnyOrder(getExpectedHashForRowCount(rows));
pipelineWrite.run();
pipelineRead.run();
}
use of org.apache.beam.sdk.transforms.Count in project beam by apache.
the class DynamoDBIOIT method runRead.
/**
* Read test dataset from DynamoDB.
*/
private void runRead() {
int rows = env.options().getNumberOfRows();
PCollection<Map<String, AttributeValue>> records = pipelineRead.apply("Read from DynamoDB", DynamoDBIO.read().withScanRequestFn(in -> buildScanRequest()).items()).apply("Flatten result", Flatten.iterables());
PAssert.thatSingleton(records.apply("Count All", Count.globally())).isEqualTo((long) rows);
PCollection<String> consolidatedHashcode = records.apply(MapElements.into(strings()).via(record -> record.get(COL_NAME).s())).apply("Hash records", Combine.globally(new HashingFn()).withoutDefaults());
PAssert.that(consolidatedHashcode).containsInAnyOrder(getExpectedHashForRowCount(rows));
pipelineRead.run().waitUntilFinish();
}
Aggregations