Search in sources :

Example 16 with CloudObject

use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.

the class InMemoryReaderFactoryTest method createInMemoryCloudSource.

static <T> Source createInMemoryCloudSource(List<T> elements, Long start, Long end, Coder<T> coder) throws Exception {
    List<String> encodedElements = InMemoryReaderTest.encodedElements(elements, coder);
    CloudObject spec = CloudObject.forClassName("InMemorySource");
    addStringList(spec, WorkerPropertyNames.ELEMENTS, encodedElements);
    if (start != null) {
        addLong(spec, WorkerPropertyNames.START_INDEX, start);
    }
    if (end != null) {
        addLong(spec, WorkerPropertyNames.END_INDEX, end);
    }
    Source cloudSource = new Source();
    cloudSource.setSpec(spec);
    cloudSource.setCodec(CloudObjects.asCloudObject(coder, /*sdkComponents=*/
    null));
    return cloudSource;
}
Also used : CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) Source(com.google.api.services.dataflow.model.Source)

Example 17 with CloudObject

use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method createWriteInstruction.

static ParallelInstruction createWriteInstruction(int producerIndex, int producerOutputNum, String systemName) {
    InstructionInput cloudInput = new InstructionInput();
    cloudInput.setProducerInstructionIndex(producerIndex);
    cloudInput.setOutputNum(producerOutputNum);
    CloudObject spec = CloudObject.forClass(IntrinsicMapTaskExecutorFactoryTest.TestSinkFactory.class);
    com.google.api.services.dataflow.model.Sink cloudSink = new com.google.api.services.dataflow.model.Sink();
    cloudSink.setSpec(spec);
    cloudSink.setCodec(windowedStringCoder);
    WriteInstruction writeInstruction = new WriteInstruction();
    writeInstruction.setInput(cloudInput);
    writeInstruction.setSink(cloudSink);
    ParallelInstruction instruction = new ParallelInstruction();
    instruction.setWrite(writeInstruction);
    instruction.setSystemName(systemName);
    instruction.setOriginalName(systemName + "OriginalName");
    return instruction;
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) Sink(org.apache.beam.runners.dataflow.worker.util.common.worker.Sink) WriteInstruction(com.google.api.services.dataflow.model.WriteInstruction) InstructionInput(com.google.api.services.dataflow.model.InstructionInput)

Example 18 with CloudObject

use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.

the class PubsubSinkTest method testWriteWith.

private void testWriteWith(String formatFn) throws Exception {
    Windmill.WorkItemCommitRequest.Builder outputBuilder = Windmill.WorkItemCommitRequest.newBuilder().setKey(ByteString.copyFromUtf8("key")).setWorkToken(0);
    when(mockContext.getOutputBuilder()).thenReturn(outputBuilder);
    Map<String, Object> spec = new HashMap<>();
    spec.put(PropertyNames.OBJECT_TYPE_NAME, "");
    spec.put(PropertyNames.PUBSUB_TOPIC, "topic");
    spec.put(PropertyNames.PUBSUB_TIMESTAMP_ATTRIBUTE, "ts");
    spec.put(PropertyNames.PUBSUB_ID_ATTRIBUTE, "id");
    if (formatFn != null) {
        spec.put(PropertyNames.PUBSUB_SERIALIZED_ATTRIBUTES_FN, formatFn);
    }
    CloudObject cloudSinkSpec = CloudObject.fromSpec(spec);
    PubsubSink.Factory factory = new PubsubSink.Factory();
    PubsubSink<String> sink = (PubsubSink<String>) factory.create(cloudSinkSpec, WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder()), null, mockContext, null);
    Sink.SinkWriter<WindowedValue<String>> writer = sink.writer();
    assertEquals(2, writer.add(WindowedValue.timestampedValueInGlobalWindow("e0", new Instant(0))));
    assertEquals(2, writer.add(WindowedValue.timestampedValueInGlobalWindow("e1", new Instant(1))));
    assertEquals(2, writer.add(WindowedValue.timestampedValueInGlobalWindow("e2", new Instant(2))));
    writer.close();
    assertEquals(Windmill.WorkItemCommitRequest.newBuilder().setKey(ByteString.copyFromUtf8("key")).setWorkToken(0).addPubsubMessages(Windmill.PubSubMessageBundle.newBuilder().setTopic("topic").setTimestampLabel("ts").setIdLabel("id").addMessages(Windmill.Message.newBuilder().setTimestamp(0).setData(ByteString.copyFromUtf8("e0"))).addMessages(Windmill.Message.newBuilder().setTimestamp(1000).setData(ByteString.copyFromUtf8("e1"))).addMessages(Windmill.Message.newBuilder().setTimestamp(2000).setData(ByteString.copyFromUtf8("e2"))).setWithAttributes(formatFn != null)).build(), outputBuilder.build());
}
Also used : HashMap(java.util.HashMap) Instant(org.joda.time.Instant) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) Sink(org.apache.beam.runners.dataflow.worker.util.common.worker.Sink) WindowedValue(org.apache.beam.sdk.util.WindowedValue) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject)

Example 19 with CloudObject

use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.

the class ReaderFactoryTest method testCreateReader.

@Test
public void testCreateReader() throws Exception {
    CloudObject spec = CloudObject.forClass(TestReaderFactory.class);
    Source cloudSource = new Source();
    cloudSource.setSpec(spec);
    cloudSource.setCodec(CloudObjects.asCloudObject(BigEndianIntegerCoder.of(), /*sdkComponents=*/
    null));
    PipelineOptions options = PipelineOptionsFactory.create();
    ReaderRegistry registry = ReaderRegistry.defaultRegistry().register(TestReaderFactory.class.getName(), new TestReaderFactory());
    NativeReader<?> reader = registry.create(cloudSource, PipelineOptionsFactory.create(), BatchModeExecutionContext.forTesting(options, "testStage"), null);
    assertThat(reader, new IsInstanceOf(TestReader.class));
}
Also used : CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) IsInstanceOf(org.hamcrest.core.IsInstanceOf) Source(com.google.api.services.dataflow.model.Source) Test(org.junit.Test)

Example 20 with CloudObject

use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.

the class ShuffleSinkFactoryTest method runTestCreateShuffleSinkHelper.

private ShuffleSink runTestCreateShuffleSinkHelper(byte[] shuffleWriterConfig, String shuffleKind, Coder<?> deserializedCoder, FullWindowedValueCoder<?> coder) throws Exception {
    CloudObject spec = CloudObject.forClassName("ShuffleSink");
    addString(spec, "shuffle_writer_config", encodeBase64String(shuffleWriterConfig));
    addString(spec, "shuffle_kind", shuffleKind);
    PipelineOptions options = PipelineOptionsFactory.create();
    ShuffleSinkFactory factory = new ShuffleSinkFactory();
    Sink<?> sink = factory.create(spec, deserializedCoder, options, BatchModeExecutionContext.forTesting(options, "testStage"), TestOperationContext.create());
    assertThat(sink, new IsInstanceOf(ShuffleSink.class));
    ShuffleSink shuffleSink = (ShuffleSink) sink;
    Assert.assertArrayEquals(shuffleWriterConfig, shuffleSink.shuffleWriterConfig);
    Assert.assertEquals(coder, shuffleSink.windowedElemCoder);
    return shuffleSink;
}
Also used : CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) IsInstanceOf(org.hamcrest.core.IsInstanceOf)

Aggregations

CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)62 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)23 Test (org.junit.Test)21 Source (com.google.api.services.dataflow.model.Source)15 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)13 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)12 ParDoFn (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn)11 OutputReceiver (org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver)10 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)10 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)10 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)9 HashMap (java.util.HashMap)9 InstructionInput (com.google.api.services.dataflow.model.InstructionInput)8 Map (java.util.Map)8 ArrayList (java.util.ArrayList)7 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)7 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)6 Structs.addString (org.apache.beam.runners.dataflow.util.Structs.addString)6 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)6 List (java.util.List)5