use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class TranslationContextTest method testRegisterInputMessageStreams.
@Test
public void testRegisterInputMessageStreams() {
final PCollection output = mock(PCollection.class);
List<String> topics = Arrays.asList("stream1", "stream2");
List inputDescriptors = topics.stream().map(topicName -> createSamzaInputDescriptor(topicName, topicName)).collect(Collectors.toList());
translationContext.registerInputMessageStreams(output, inputDescriptors);
assertNotNull(translationContext.getMessageStream(output));
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class BigQuerySamplesIT method testTableIO.
@Test
public void testTableIO() throws Exception {
String table = testName.getMethodName();
// ===--- Test 1: createTableRow + writeToTable ---===\\
// The rest of the tests depend on this since this is the one that writes
// the contents into the BigQuery table, which the other tests then read.
TableSchema schema = BigQuerySchemaCreate.createSchema();
PCollection<TableRow> rows = writePipeline.apply(Create.of(Arrays.asList(BigQueryTableRowCreate.createTableRow())));
BigQueryWriteToTable.writeToTable(PROJECT, DATASET, table, schema, rows);
writePipeline.run().waitUntilFinish();
// Check that the BigQuery table has the data using the BigQuery Client Library.
String query = String.format("SELECT * FROM `%s.%s.%s`", PROJECT, DATASET, table);
List<String> queryResults = StreamSupport.stream(BIGQUERY.query(QueryJobConfiguration.of(query)).iterateAll().spliterator(), false).flatMap(values -> fieldValueListToStrings(values).stream()).collect(Collectors.toList());
assertEquals(expected, queryResults);
// ===--- Test 2: readFromTable ---=== \\
readAndCheck(BigQueryReadFromTable.readFromTable(PROJECT, DATASET, table, readTablePipeline));
readTablePipeline.run().waitUntilFinish();
// ===--- Test 3: readFromQuery ---=== \\
readAndCheck(BigQueryReadFromQuery.readFromQuery(PROJECT, DATASET, table, readQueryPipeline));
readQueryPipeline.run().waitUntilFinish();
// ===--- Test 4: readFromTableWithBigQueryStorageAPI ---=== \\
readAndCheck(BigQueryReadFromTableWithBigQueryStorageAPI.readFromTableWithBigQueryStorageAPI(PROJECT, DATASET, table, readBQStorageAPIPipeline));
readBQStorageAPIPipeline.run().waitUntilFinish();
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class ReadSourcePortableTest method testExecution.
@Test(timeout = 120_000)
public void testExecution() throws Exception {
PipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=use_deprecated_read").create();
options.setRunner(CrashingRunner.class);
options.as(FlinkPipelineOptions.class).setFlinkMaster("[local]");
options.as(FlinkPipelineOptions.class).setStreaming(isStreaming);
options.as(FlinkPipelineOptions.class).setParallelism(2);
options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
Pipeline p = Pipeline.create(options);
PCollection<Long> result = p.apply(Read.from(new Source(10))).apply(Window.into(FixedWindows.of(Duration.millis(1))));
PAssert.that(result).containsInAnyOrder(ImmutableList.of(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L));
SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReads(p);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
List<RunnerApi.PTransform> readTransforms = pipelineProto.getComponents().getTransformsMap().values().stream().filter(transform -> transform.getSpec().getUrn().equals(PTransformTranslation.READ_TRANSFORM_URN)).collect(Collectors.toList());
assertThat(readTransforms, not(empty()));
// execute the pipeline
JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("fakeId", "fakeRetrievalToken", flinkJobExecutor, pipelineProto, options.as(FlinkPipelineOptions.class), new FlinkPipelineRunner(options.as(FlinkPipelineOptions.class), null, Collections.emptyList()));
jobInvocation.start();
while (jobInvocation.getState() != JobState.Enum.DONE) {
assertThat(jobInvocation.getState(), not(JobState.Enum.FAILED));
Thread.sleep(100);
}
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class DirectGroupByKeyOverrideFactoryTest method getInputSucceeds.
@Test
public void getInputSucceeds() {
TestPipeline p = TestPipeline.create();
PCollection<KV<String, Integer>> input = p.apply(Create.of(KV.of("foo", 1)).withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())));
PCollection<KV<String, Iterable<Integer>>> grouped = input.apply(GroupByKey.create());
AppliedPTransform<?, ?, ?> producer = DirectGraphs.getProducer(grouped);
PTransformReplacement<PCollection<KV<String, Integer>>, PCollection<KV<String, Iterable<Integer>>>> replacement = factory.getReplacementTransform((AppliedPTransform) producer);
assertThat(replacement.getInput(), Matchers.<PCollection<?>>equalTo(input));
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class ReadSourceTranslatorBatch method translateTransform.
@SuppressWarnings("unchecked")
@Override
public void translateTransform(PTransform<PBegin, PCollection<T>> transform, AbstractTranslationContext context) {
AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> rootTransform = (AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>>) context.getCurrentTransform();
BoundedSource<T> source;
try {
source = ReadTranslation.boundedSourceFromTransform(rootTransform);
} catch (IOException e) {
throw new RuntimeException(e);
}
SparkSession sparkSession = context.getSparkSession();
String serializedSource = Base64Serializer.serializeUnchecked(source);
Dataset<Row> rowDataset = sparkSession.read().format(sourceProviderClass).option(BEAM_SOURCE_OPTION, serializedSource).option(DEFAULT_PARALLELISM, String.valueOf(context.getSparkSession().sparkContext().defaultParallelism())).option(PIPELINE_OPTIONS, context.getSerializableOptions().toString()).load();
// extract windowedValue from Row
WindowedValue.FullWindowedValueCoder<T> windowedValueCoder = WindowedValue.FullWindowedValueCoder.of(source.getOutputCoder(), GlobalWindow.Coder.INSTANCE);
Dataset<WindowedValue<T>> dataset = rowDataset.map(RowHelpers.extractWindowedValueFromRowMapFunction(windowedValueCoder), EncoderHelpers.fromBeamCoder(windowedValueCoder));
PCollection<T> output = (PCollection<T>) context.getOutput();
context.putDataset(output, dataset);
}
Aggregations