Search in sources :

Example 41 with CloudObject

use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.

the class AvroByteSinkFactoryTest method runTestCreateAvroSink.

private Sink<?> runTestCreateAvroSink(String filename, Coder<?> coder) throws Exception {
    CloudObject spec = CloudObject.forClassName("AvroSink");
    addString(spec, "filename", filename);
    PipelineOptions options = PipelineOptionsFactory.create();
    AvroByteSinkFactory factory = new AvroByteSinkFactory();
    Sink<?> sink = factory.create(spec, coder, options, BatchModeExecutionContext.forTesting(options, "testStage"), TestOperationContext.create());
    return sink;
}
Also used : CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions)

Example 42 with CloudObject

use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method createReadInstruction.

static ParallelInstruction createReadInstruction(String name, Class<? extends ReaderFactory> readerFactoryClass) {
    CloudObject spec = CloudObject.forClass(readerFactoryClass);
    Source cloudSource = new Source();
    cloudSource.setSpec(spec);
    cloudSource.setCodec(windowedStringCoder);
    ReadInstruction readInstruction = new ReadInstruction();
    readInstruction.setSource(cloudSource);
    InstructionOutput output = new InstructionOutput();
    output.setName("read_output_name");
    output.setCodec(windowedStringCoder);
    output.setOriginalName("originalName");
    output.setSystemName("systemName");
    ParallelInstruction instruction = new ParallelInstruction();
    instruction.setSystemName(name);
    instruction.setOriginalName(name + "OriginalName");
    instruction.setRead(readInstruction);
    instruction.setOutputs(Arrays.asList(output));
    return instruction;
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) ReadInstruction(com.google.api.services.dataflow.model.ReadInstruction) Source(com.google.api.services.dataflow.model.Source)

Example 43 with CloudObject

use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method testCreatePartialGroupByKeyOperationWithCombine.

@Test
public void testCreatePartialGroupByKeyOperationWithCombine() throws Exception {
    int producerIndex = 1;
    int producerOutputNum = 2;
    ParallelInstruction instruction = createPartialGroupByKeyInstruction(producerIndex, producerOutputNum);
    AppliedCombineFn<?, ?, ?, ?> combineFn = AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
    CloudObject cloudCombineFn = CloudObject.forClassName("CombineFn");
    addString(cloudCombineFn, PropertyNames.SERIALIZED_FN, byteArrayToJsonString(serializeToByteArray(combineFn)));
    instruction.getPartialGroupByKey().setValueCombiningFn(cloudCombineFn);
    ParallelInstructionNode instructionNode = ParallelInstructionNode.create(instruction, ExecutionLocation.UNKNOWN);
    when(network.successors(instructionNode)).thenReturn(ImmutableSet.<Node>of(IntrinsicMapTaskExecutorFactory.createOutputReceiversTransform(STAGE, counterSet).apply(InstructionOutputNode.create(instructionNode.getParallelInstruction().getOutputs().get(0), PCOLLECTION_ID))));
    when(network.outDegree(instructionNode)).thenReturn(1);
    Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, options, readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage")).apply(instructionNode);
    assertThat(operationNode, instanceOf(OperationNode.class));
    assertThat(((OperationNode) operationNode).getOperation(), instanceOf(ParDoOperation.class));
    ParDoOperation pgbkOperation = (ParDoOperation) ((OperationNode) operationNode).getOperation();
    assertEquals(1, pgbkOperation.receivers.length);
    assertEquals(0, pgbkOperation.receivers[0].getReceiverCount());
    assertEquals(Operation.InitializationState.UNSTARTED, pgbkOperation.initializationState);
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) Test(org.junit.Test)

Example 44 with CloudObject

use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method createParDoInstruction.

static ParallelInstruction createParDoInstruction(int producerIndex, int producerOutputNum, String systemName, String userName) {
    InstructionInput cloudInput = new InstructionInput();
    cloudInput.setProducerInstructionIndex(producerIndex);
    cloudInput.setOutputNum(producerOutputNum);
    TestDoFn fn = new TestDoFn();
    String serializedFn = StringUtils.byteArrayToJsonString(SerializableUtils.serializeToByteArray(DoFnInfo.forFn(fn, WindowingStrategy.globalDefault(), null, /* side input views */
    null, /* input coder */
    new TupleTag<>(PropertyNames.OUTPUT), /* main output id */
    DoFnSchemaInformation.create(), Collections.emptyMap())));
    CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
    addString(cloudUserFn, PropertyNames.SERIALIZED_FN, serializedFn);
    MultiOutputInfo mainOutputTag = new MultiOutputInfo();
    mainOutputTag.setTag("1");
    ParDoInstruction parDoInstruction = new ParDoInstruction();
    parDoInstruction.setInput(cloudInput);
    parDoInstruction.setNumOutputs(1);
    parDoInstruction.setMultiOutputInfos(ImmutableList.of(mainOutputTag));
    parDoInstruction.setUserFn(cloudUserFn);
    InstructionOutput output = new InstructionOutput();
    output.setName(systemName + "_output");
    output.setCodec(windowedStringCoder);
    output.setOriginalName("originalName");
    output.setSystemName("systemName");
    ParallelInstruction instruction = new ParallelInstruction();
    instruction.setParDo(parDoInstruction);
    instruction.setOutputs(Arrays.asList(output));
    instruction.setSystemName(systemName);
    instruction.setOriginalName(systemName + "OriginalName");
    instruction.setName(userName);
    return instruction;
}
Also used : ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) MultiOutputInfo(com.google.api.services.dataflow.model.MultiOutputInfo) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) InstructionInput(com.google.api.services.dataflow.model.InstructionInput) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString)

Example 45 with CloudObject

use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.

the class DefaultParDoFnFactoryTest method testCreateSimpleParDoFn.

/**
 * Tests that a {@link SimpleParDoFn} is correctly dispatched to {@code UserParDoFnFactory} and
 * instantiated correctly.
 */
@Test
public void testCreateSimpleParDoFn() throws Exception {
    // A serialized DoFn
    String stringFieldValue = "some state";
    long longFieldValue = 42L;
    TestDoFn fn = new TestDoFn(stringFieldValue, longFieldValue);
    String serializedFn = StringUtils.byteArrayToJsonString(SerializableUtils.serializeToByteArray(DoFnInfo.forFn(fn, WindowingStrategy.globalDefault(), null, /* side input views */
    null, /* input coder */
    new TupleTag<>("output"), /* main output */
    DoFnSchemaInformation.create(), Collections.emptyMap())));
    CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
    addString(cloudUserFn, "serialized_fn", serializedFn);
    // Create the ParDoFn from the serialized DoFn
    ParDoFn parDoFn = DEFAULT_FACTORY.create(DEFAULT_OPTIONS, cloudUserFn, null, MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), DEFAULT_EXECUTION_CONTEXT, TestOperationContext.create(counterSet));
    // Test that the factory created the correct class
    assertThat(parDoFn, instanceOf(SimpleParDoFn.class));
    // TODO: move the asserts below into new tests in UserParDoFnFactoryTest, and this test should
    // simply assert that DefaultParDoFnFactory.create() matches UserParDoFnFactory.create()
    // Test that the DoFnInfo reflects the one passed in
    SimpleParDoFn simpleParDoFn = (SimpleParDoFn) parDoFn;
    parDoFn.startBundle(new OutputReceiver());
    // DoFnInfo may not yet be initialized until an element is processed
    parDoFn.processElement(WindowedValue.valueInGlobalWindow("foo"));
    @SuppressWarnings("rawtypes") DoFnInfo doFnInfo = simpleParDoFn.getDoFnInfo();
    DoFn innerDoFn = (TestDoFn) doFnInfo.getDoFn();
    assertThat(innerDoFn, instanceOf(TestDoFn.class));
    assertThat(doFnInfo.getWindowingStrategy().getWindowFn(), instanceOf(GlobalWindows.class));
    assertThat(doFnInfo.getWindowingStrategy().getTrigger(), instanceOf(DefaultTrigger.class));
    // Test that the deserialized user DoFn is as expected
    TestDoFn actualTestDoFn = (TestDoFn) innerDoFn;
    assertEquals(stringFieldValue, actualTestDoFn.stringField);
    assertEquals(longFieldValue, actualTestDoFn.longField);
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) DoFnInfo(org.apache.beam.sdk.util.DoFnInfo) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) DoFn(org.apache.beam.sdk.transforms.DoFn) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) DefaultTrigger(org.apache.beam.sdk.transforms.windowing.DefaultTrigger) Test(org.junit.Test)

Aggregations

CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)62 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)23 Test (org.junit.Test)21 Source (com.google.api.services.dataflow.model.Source)15 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)13 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)12 ParDoFn (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn)11 OutputReceiver (org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver)10 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)10 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)10 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)9 HashMap (java.util.HashMap)9 InstructionInput (com.google.api.services.dataflow.model.InstructionInput)8 Map (java.util.Map)8 ArrayList (java.util.ArrayList)7 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)7 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)6 Structs.addString (org.apache.beam.runners.dataflow.util.Structs.addString)6 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)6 List (java.util.List)5