use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class BeamCalcRelTest method testNoFieldAccess.
@Test
public void testNoFieldAccess() throws IllegalAccessException {
String sql = "SELECT 1 FROM ORDER_DETAILS_BOUNDED";
PCollection<Row> rows = compilePipeline(sql, pipeline);
final NodeGetter nodeGetter = new NodeGetter(rows);
pipeline.traverseTopologically(nodeGetter);
ParDo.MultiOutput<Row, Row> pardo = (ParDo.MultiOutput<Row, Row>) nodeGetter.producer.getTransform();
PCollection<Row> input = (PCollection<Row>) Iterables.getOnlyElement(nodeGetter.producer.getInputs().values());
DoFnSchemaInformation info = ParDo.getDoFnSchemaInformation(pardo.getFn(), input);
FieldAccessDescriptor fieldAccess = info.getFieldAccessDescriptor();
Assert.assertFalse(fieldAccess.getAllFields());
Assert.assertTrue(fieldAccess.getFieldsAccessed().isEmpty());
Assert.assertTrue(fieldAccess.getNestedFieldsAccessed().isEmpty());
pipeline.run().waitUntilFinish();
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class BeamZetaSqlCalcRelTest method testSingleFieldAccess.
@Test
public void testSingleFieldAccess() throws IllegalAccessException {
String sql = "SELECT Key FROM KeyValue";
PCollection<Row> rows = compile(sql);
final NodeGetter nodeGetter = new NodeGetter(rows);
pipeline.traverseTopologically(nodeGetter);
ParDo.MultiOutput<Row, Row> pardo = (ParDo.MultiOutput<Row, Row>) nodeGetter.producer.getTransform();
PCollection<Row> input = (PCollection<Row>) Iterables.getOnlyElement(nodeGetter.producer.getInputs().values());
DoFnSchemaInformation info = ParDo.getDoFnSchemaInformation(pardo.getFn(), input);
FieldAccessDescriptor fieldAccess = info.getFieldAccessDescriptor();
Assert.assertTrue(fieldAccess.referencesSingleField());
Assert.assertEquals("Key", Iterables.getOnlyElement(fieldAccess.fieldNamesAccessed()));
pipeline.run().waitUntilFinish();
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class PubsubMessageToRowTest method testSendsFlatRowInvalidToDLQ.
@Test
public void testSendsFlatRowInvalidToDLQ() {
Schema messageSchema = Schema.builder().addDateTimeField("event_timestamp").addInt32Field("id").addStringField("name").build();
PCollectionTuple outputs = pipeline.apply("create", Create.timestamped(message(1, map("attr1", "val1"), "{ \"invalid1\" : \"sdfsd\" }"), message(2, map("attr2", "val2"), "{ \"invalid2"), message(3, map("attr", "val"), "{ \"id\" : 3, \"name\" : \"foo\" }"), message(4, map("bttr", "vbl"), "{ \"name\" : \"baz\", \"id\" : 5 }"))).apply("convert", PubsubMessageToRow.builder().messageSchema(messageSchema).useDlq(true).useFlatSchema(true).serializerProvider(JSON_SERIALIZER_PROVIDER).build());
PCollection<Row> rows = outputs.get(MAIN_TAG);
PCollection<PubsubMessage> dlqMessages = outputs.get(DLQ_TAG);
PAssert.that(dlqMessages).satisfies(messages -> {
assertEquals(2, size(messages));
assertEquals(ImmutableSet.of(map("attr1", "val1"), map("attr2", "val2")), convertToSet(messages, PubsubMessage::getAttributeMap));
assertEquals(ImmutableSet.of("{ \"invalid1\" : \"sdfsd\" }", "{ \"invalid2"), convertToSet(messages, m -> new String(m.getPayload(), UTF_8)));
return null;
});
PAssert.that(rows).containsInAnyOrder(Row.withSchema(messageSchema).addValues(ts(3), /* map("attr", "val"), */
3, "foo").build(), Row.withSchema(messageSchema).addValues(ts(4), /* map("bttr", "vbl"), */
5, "baz").build());
pipeline.run();
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class PubsubReadIT method testReadPublicData.
@Test
public void testReadPublicData() throws Exception {
// The pipeline will never terminate on its own
pipeline.getOptions().as(TestPipelineOptions.class).setBlockOnRun(false);
PCollection<String> messages = pipeline.apply(PubsubIO.readStrings().fromTopic("projects/pubsub-public-data/topics/taxirides-realtime"));
messages.apply("waitForAnyMessage", signal.signalSuccessWhen(messages.getCoder(), anyMessages -> true));
Supplier<Void> start = signal.waitForStart(Duration.standardMinutes(5));
pipeline.apply(signal.signalStart());
PipelineResult job = pipeline.run();
start.get();
signal.waitForSuccess(Duration.standardMinutes(5));
// A runner may not support cancel
try {
job.cancel();
} catch (UnsupportedOperationException exc) {
// noop
}
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class StreamingSourceMetricsTest method testUnboundedSourceMetrics.
@Test
@Category(StreamingTest.class)
public void testUnboundedSourceMetrics() {
final long minElements = 1000;
// Use a GenerateSequence for the UnboundedSequence, but push the watermark to infinity at
// minElements to let the test pipeline cleanly shut it down. Shutdown will occur shortly
// afterwards, but at least minElements will be reported in the metrics.
PCollection<Long> pc = pipeline.apply(GenerateSequence.from(1).withRate(minElements / 10, Duration.millis(500L)).withTimestampFn(t -> t < minElements ? Instant.now() : BoundedWindow.TIMESTAMP_MAX_VALUE));
assertThat(pc.isBounded(), is(PCollection.IsBounded.UNBOUNDED));
PipelineResult pipelineResult = pipeline.run();
MetricQueryResults metrics = pipelineResult.metrics().queryMetrics(MetricsFilter.builder().addNameFilter(MetricNameFilter.named(ELEMENTS_READ.getNamespace(), ELEMENTS_READ.getName())).build());
assertThat(metrics.getCounters(), hasItem(metricsResult(ELEMENTS_READ.getNamespace(), ELEMENTS_READ.getName(), "GenerateSequence/Read(UnboundedCountingSource)", greaterThanOrEqualTo(minElements), false)));
}
Aggregations