Search in sources :

Example 46 with CloudObject

use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.

the class DefaultParDoFnFactoryTest method testCreateUnknownParDoFn.

@Test
public void testCreateUnknownParDoFn() throws Exception {
    // A bogus serialized DoFn
    CloudObject cloudUserFn = CloudObject.forClassName("UnknownKindOfDoFn");
    try {
        DEFAULT_FACTORY.create(DEFAULT_OPTIONS, cloudUserFn, null, MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), DEFAULT_EXECUTION_CONTEXT, TestOperationContext.create(counterSet));
        fail("should have thrown an exception");
    } catch (Exception exn) {
        assertThat(exn.toString(), Matchers.containsString("No known ParDoFnFactory"));
    }
}
Also used : CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) Test(org.junit.Test)

Example 47 with CloudObject

use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.

the class CombineValuesFnFactoryTest method createCombineValuesFn.

private <K, InputT, AccumT, OutputT> ParDoFn createCombineValuesFn(String phase, Combine.CombineFn<InputT, AccumT, OutputT> combineFn, Coder<K> keyCoder, Coder<InputT> inputCoder, Coder<AccumT> accumCoder, WindowingStrategy<?, ?> windowingStrategy) throws Exception {
    // This partially mirrors the work that
    // org.apache.beam.runners.dataflow.worker.transforms.Combine.translateHelper
    // does, at least for the KeyedCombineFn. The phase is generated
    // by the back-end.
    CloudObject spec = CloudObject.forClassName("CombineValuesFn");
    @SuppressWarnings("unchecked") AppliedCombineFn appliedCombineFn = AppliedCombineFn.withAccumulatorCoder(combineFn, accumCoder, Collections.emptyList(), KvCoder.of(keyCoder, inputCoder), windowingStrategy);
    addString(spec, PropertyNames.SERIALIZED_FN, byteArrayToJsonString(serializeToByteArray(appliedCombineFn)));
    addString(spec, WorkerPropertyNames.PHASE, phase);
    return parDoFnFactory.create(PipelineOptionsFactory.create(), spec, ImmutableList.<SideInputInfo>of(), MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), BatchModeExecutionContext.forTesting(PipelineOptionsFactory.create(), "testStage"), TestOperationContext.create());
}
Also used : CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) AppliedCombineFn(org.apache.beam.sdk.util.AppliedCombineFn)

Example 48 with CloudObject

use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.

the class ConcatReaderFactoryTest method createSourcesWithInMemorySources.

Source createSourcesWithInMemorySources(List<List<String>> allData) {
    List<Map<String, Object>> sourcesList = new ArrayList<>();
    Source source = new Source();
    for (List<String> data : allData) {
        CloudObject inMemorySourceSpec = CloudObject.forClassName("InMemorySource");
        Map<String, Object> inMemorySourceDictionary = new HashMap<>();
        addStringList(inMemorySourceSpec, WorkerPropertyNames.ELEMENTS, data);
        addLong(inMemorySourceSpec, WorkerPropertyNames.START_INDEX, 0L);
        addLong(inMemorySourceSpec, WorkerPropertyNames.END_INDEX, data.size());
        inMemorySourceDictionary.put(PropertyNames.SOURCE_SPEC, inMemorySourceSpec);
        CloudObject textSourceEncoding = CloudObjects.asCloudObject(StringUtf8Coder.of(), /*sdkComponents=*/
        null);
        inMemorySourceDictionary.put(PropertyNames.ENCODING, textSourceEncoding);
        sourcesList.add(inMemorySourceDictionary);
    }
    CloudObject spec = CloudObject.forClassName("ConcatSource");
    addList(spec, WorkerPropertyNames.CONCAT_SOURCE_SOURCES, sourcesList);
    source.setSpec(spec);
    return source;
}
Also used : CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) HashMap(java.util.HashMap) Map(java.util.Map) Source(com.google.api.services.dataflow.model.Source)

Example 49 with CloudObject

use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.

the class StreamingDataflowWorkerTest method runMergeSessionsActions.

// Helper for running tests for merging sessions based upon Actions consisting of GetWorkResponse
// and expected timers and holds in the corresponding commit. All GetData requests are responded
// to with empty state, relying on user worker caching to keep data written.
private void runMergeSessionsActions(List<Action> actions) throws Exception {
    Coder<KV<String, String>> kvCoder = KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
    Coder<WindowedValue<KV<String, String>>> windowedKvCoder = FullWindowedValueCoder.of(kvCoder, IntervalWindow.getCoder());
    KvCoder<String, List<String>> groupedCoder = KvCoder.of(StringUtf8Coder.of(), ListCoder.of(StringUtf8Coder.of()));
    Coder<WindowedValue<KV<String, List<String>>>> windowedGroupedCoder = FullWindowedValueCoder.of(groupedCoder, IntervalWindow.getCoder());
    CloudObject spec = CloudObject.forClassName("MergeWindowsDoFn");
    SdkComponents sdkComponents = SdkComponents.create();
    sdkComponents.registerEnvironment(Environments.JAVA_SDK_HARNESS_ENVIRONMENT);
    addString(spec, PropertyNames.SERIALIZED_FN, StringUtils.byteArrayToJsonString(WindowingStrategyTranslation.toMessageProto(WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))).withMode(AccumulationMode.DISCARDING_FIRED_PANES).withTrigger(Repeatedly.forever(AfterWatermark.pastEndOfWindow().withLateFirings(AfterPane.elementCountAtLeast(1)))).withAllowedLateness(Duration.standardMinutes(60)), sdkComponents).toByteArray()));
    addObject(spec, WorkerPropertyNames.INPUT_CODER, CloudObjects.asCloudObject(windowedKvCoder, /*sdkComponents=*/
    null));
    ParallelInstruction mergeWindowsInstruction = new ParallelInstruction().setSystemName("MergeWindows-System").setName("MergeWindowsStep").setOriginalName("MergeWindowsOriginal").setParDo(new ParDoInstruction().setInput(new InstructionInput().setProducerInstructionIndex(0).setOutputNum(0)).setNumOutputs(1).setUserFn(spec)).setOutputs(Arrays.asList(new InstructionOutput().setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME).setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME).setName("output").setCodec(CloudObjects.asCloudObject(windowedGroupedCoder, /*sdkComponents=*/
    null))));
    List<ParallelInstruction> instructions = Arrays.asList(makeWindowingSourceInstruction(kvCoder), mergeWindowsInstruction, makeSinkInstruction(groupedCoder, 1));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    StreamingDataflowWorker worker = makeWorker(instructions, createTestingPipelineOptions(server), false);
    Map<String, String> nameMap = new HashMap<>();
    nameMap.put("MergeWindowsStep", "MergeWindows");
    worker.addStateNameMappings(nameMap);
    worker.start();
    // Respond to any GetData requests with empty state.
    for (int i = 0; i < 1000; ++i) {
        server.addDataFnToOffer(EMPTY_DATA_RESPONDER);
    }
    for (int i = 0; i < actions.size(); ++i) {
        Action action = actions.get(i);
        server.addWorkToOffer(action.response);
        Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
        WorkItemCommitRequest actualOutput = result.get(i + 1L);
        assertThat(actualOutput, Matchers.not(Matchers.nullValue()));
        verifyTimers(actualOutput, action.expectedTimers);
        verifyHolds(actualOutput, action.expectedHolds);
    }
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) WindowedValue(org.apache.beam.sdk.util.WindowedValue) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) InstructionInput(com.google.api.services.dataflow.model.InstructionInput)

Example 50 with CloudObject

use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.

the class StreamingDataflowWorkerTest method testAssignWindows.

@Test
public void testAssignWindows() throws Exception {
    Duration gapDuration = Duration.standardSeconds(1);
    CloudObject spec = CloudObject.forClassName("AssignWindowsDoFn");
    SdkComponents sdkComponents = SdkComponents.create();
    sdkComponents.registerEnvironment(Environments.JAVA_SDK_HARNESS_ENVIRONMENT);
    addString(spec, PropertyNames.SERIALIZED_FN, StringUtils.byteArrayToJsonString(WindowingStrategyTranslation.toMessageProto(WindowingStrategy.of(FixedWindows.of(gapDuration)), sdkComponents).toByteArray()));
    ParallelInstruction addWindowsInstruction = new ParallelInstruction().setSystemName("AssignWindows").setName("AssignWindows").setOriginalName("AssignWindowsOriginal").setParDo(new ParDoInstruction().setInput(new InstructionInput().setProducerInstructionIndex(0).setOutputNum(0)).setNumOutputs(1).setUserFn(spec)).setOutputs(Arrays.asList(new InstructionOutput().setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME).setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME).setName("output").setCodec(CloudObjects.asCloudObject(WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder()), /*sdkComponents=*/
    null))));
    List<ParallelInstruction> instructions = Arrays.asList(makeSourceInstruction(StringUtf8Coder.of()), addWindowsInstruction, makeSinkInstruction(StringUtf8Coder.of(), 1));
    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
    int timestamp1 = 0;
    int timestamp2 = 1000000;
    server.addWorkToOffer(makeInput(timestamp1, timestamp1));
    server.addWorkToOffer(makeInput(timestamp2, timestamp2));
    StreamingDataflowWorker worker = makeWorker(instructions, createTestingPipelineOptions(server), false);
    worker.start();
    Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(2);
    assertThat(result.get((long) timestamp1), equalTo(setMessagesMetadata(PaneInfo.NO_FIRING, intervalWindowBytes(WINDOW_AT_ZERO), makeExpectedOutput(timestamp1, timestamp1)).build()));
    assertThat(result.get((long) timestamp2), equalTo(setMessagesMetadata(PaneInfo.NO_FIRING, intervalWindowBytes(WINDOW_AT_ONE_SECOND), makeExpectedOutput(timestamp2, timestamp2)).build()));
}
Also used : InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) Duration(org.joda.time.Duration) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) WorkItemCommitRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) InstructionInput(com.google.api.services.dataflow.model.InstructionInput) Test(org.junit.Test)

Aggregations

CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)62 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)23 Test (org.junit.Test)21 Source (com.google.api.services.dataflow.model.Source)15 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)13 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)12 ParDoFn (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn)11 OutputReceiver (org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver)10 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)10 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)10 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)9 HashMap (java.util.HashMap)9 InstructionInput (com.google.api.services.dataflow.model.InstructionInput)8 Map (java.util.Map)8 ArrayList (java.util.ArrayList)7 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)7 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)6 Structs.addString (org.apache.beam.runners.dataflow.util.Structs.addString)6 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)6 List (java.util.List)5