use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.
the class AvroByteSinkFactoryTest method runTestCreateAvroSink.
private Sink<?> runTestCreateAvroSink(String filename, Coder<?> coder) throws Exception {
CloudObject spec = CloudObject.forClassName("AvroSink");
addString(spec, "filename", filename);
PipelineOptions options = PipelineOptionsFactory.create();
AvroByteSinkFactory factory = new AvroByteSinkFactory();
Sink<?> sink = factory.create(spec, coder, options, BatchModeExecutionContext.forTesting(options, "testStage"), TestOperationContext.create());
return sink;
}
use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method createReadInstruction.
static ParallelInstruction createReadInstruction(String name, Class<? extends ReaderFactory> readerFactoryClass) {
CloudObject spec = CloudObject.forClass(readerFactoryClass);
Source cloudSource = new Source();
cloudSource.setSpec(spec);
cloudSource.setCodec(windowedStringCoder);
ReadInstruction readInstruction = new ReadInstruction();
readInstruction.setSource(cloudSource);
InstructionOutput output = new InstructionOutput();
output.setName("read_output_name");
output.setCodec(windowedStringCoder);
output.setOriginalName("originalName");
output.setSystemName("systemName");
ParallelInstruction instruction = new ParallelInstruction();
instruction.setSystemName(name);
instruction.setOriginalName(name + "OriginalName");
instruction.setRead(readInstruction);
instruction.setOutputs(Arrays.asList(output));
return instruction;
}
use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method testCreatePartialGroupByKeyOperationWithCombine.
@Test
public void testCreatePartialGroupByKeyOperationWithCombine() throws Exception {
int producerIndex = 1;
int producerOutputNum = 2;
ParallelInstruction instruction = createPartialGroupByKeyInstruction(producerIndex, producerOutputNum);
AppliedCombineFn<?, ?, ?, ?> combineFn = AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
CloudObject cloudCombineFn = CloudObject.forClassName("CombineFn");
addString(cloudCombineFn, PropertyNames.SERIALIZED_FN, byteArrayToJsonString(serializeToByteArray(combineFn)));
instruction.getPartialGroupByKey().setValueCombiningFn(cloudCombineFn);
ParallelInstructionNode instructionNode = ParallelInstructionNode.create(instruction, ExecutionLocation.UNKNOWN);
when(network.successors(instructionNode)).thenReturn(ImmutableSet.<Node>of(IntrinsicMapTaskExecutorFactory.createOutputReceiversTransform(STAGE, counterSet).apply(InstructionOutputNode.create(instructionNode.getParallelInstruction().getOutputs().get(0), PCOLLECTION_ID))));
when(network.outDegree(instructionNode)).thenReturn(1);
Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, options, readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage")).apply(instructionNode);
assertThat(operationNode, instanceOf(OperationNode.class));
assertThat(((OperationNode) operationNode).getOperation(), instanceOf(ParDoOperation.class));
ParDoOperation pgbkOperation = (ParDoOperation) ((OperationNode) operationNode).getOperation();
assertEquals(1, pgbkOperation.receivers.length);
assertEquals(0, pgbkOperation.receivers[0].getReceiverCount());
assertEquals(Operation.InitializationState.UNSTARTED, pgbkOperation.initializationState);
}
use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method createParDoInstruction.
static ParallelInstruction createParDoInstruction(int producerIndex, int producerOutputNum, String systemName, String userName) {
InstructionInput cloudInput = new InstructionInput();
cloudInput.setProducerInstructionIndex(producerIndex);
cloudInput.setOutputNum(producerOutputNum);
TestDoFn fn = new TestDoFn();
String serializedFn = StringUtils.byteArrayToJsonString(SerializableUtils.serializeToByteArray(DoFnInfo.forFn(fn, WindowingStrategy.globalDefault(), null, /* side input views */
null, /* input coder */
new TupleTag<>(PropertyNames.OUTPUT), /* main output id */
DoFnSchemaInformation.create(), Collections.emptyMap())));
CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
addString(cloudUserFn, PropertyNames.SERIALIZED_FN, serializedFn);
MultiOutputInfo mainOutputTag = new MultiOutputInfo();
mainOutputTag.setTag("1");
ParDoInstruction parDoInstruction = new ParDoInstruction();
parDoInstruction.setInput(cloudInput);
parDoInstruction.setNumOutputs(1);
parDoInstruction.setMultiOutputInfos(ImmutableList.of(mainOutputTag));
parDoInstruction.setUserFn(cloudUserFn);
InstructionOutput output = new InstructionOutput();
output.setName(systemName + "_output");
output.setCodec(windowedStringCoder);
output.setOriginalName("originalName");
output.setSystemName("systemName");
ParallelInstruction instruction = new ParallelInstruction();
instruction.setParDo(parDoInstruction);
instruction.setOutputs(Arrays.asList(output));
instruction.setSystemName(systemName);
instruction.setOriginalName(systemName + "OriginalName");
instruction.setName(userName);
return instruction;
}
use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.
the class DefaultParDoFnFactoryTest method testCreateSimpleParDoFn.
/**
* Tests that a {@link SimpleParDoFn} is correctly dispatched to {@code UserParDoFnFactory} and
* instantiated correctly.
*/
@Test
public void testCreateSimpleParDoFn() throws Exception {
// A serialized DoFn
String stringFieldValue = "some state";
long longFieldValue = 42L;
TestDoFn fn = new TestDoFn(stringFieldValue, longFieldValue);
String serializedFn = StringUtils.byteArrayToJsonString(SerializableUtils.serializeToByteArray(DoFnInfo.forFn(fn, WindowingStrategy.globalDefault(), null, /* side input views */
null, /* input coder */
new TupleTag<>("output"), /* main output */
DoFnSchemaInformation.create(), Collections.emptyMap())));
CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
addString(cloudUserFn, "serialized_fn", serializedFn);
// Create the ParDoFn from the serialized DoFn
ParDoFn parDoFn = DEFAULT_FACTORY.create(DEFAULT_OPTIONS, cloudUserFn, null, MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), DEFAULT_EXECUTION_CONTEXT, TestOperationContext.create(counterSet));
// Test that the factory created the correct class
assertThat(parDoFn, instanceOf(SimpleParDoFn.class));
// TODO: move the asserts below into new tests in UserParDoFnFactoryTest, and this test should
// simply assert that DefaultParDoFnFactory.create() matches UserParDoFnFactory.create()
// Test that the DoFnInfo reflects the one passed in
SimpleParDoFn simpleParDoFn = (SimpleParDoFn) parDoFn;
parDoFn.startBundle(new OutputReceiver());
// DoFnInfo may not yet be initialized until an element is processed
parDoFn.processElement(WindowedValue.valueInGlobalWindow("foo"));
@SuppressWarnings("rawtypes") DoFnInfo doFnInfo = simpleParDoFn.getDoFnInfo();
DoFn innerDoFn = (TestDoFn) doFnInfo.getDoFn();
assertThat(innerDoFn, instanceOf(TestDoFn.class));
assertThat(doFnInfo.getWindowingStrategy().getWindowFn(), instanceOf(GlobalWindows.class));
assertThat(doFnInfo.getWindowingStrategy().getTrigger(), instanceOf(DefaultTrigger.class));
// Test that the deserialized user DoFn is as expected
TestDoFn actualTestDoFn = (TestDoFn) innerDoFn;
assertEquals(stringFieldValue, actualTestDoFn.stringField);
assertEquals(longFieldValue, actualTestDoFn.longField);
}
Aggregations