Search in sources :

Example 41 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class ConcatReaderFactoryTest method testCreateConcatReaderWithManySubSources.

@Test
public void testCreateConcatReaderWithManySubSources() throws Exception {
    List<List<String>> allData = createInMemorySourceData(15, 10);
    Source source = createSourcesWithInMemorySources(allData);
    @SuppressWarnings("unchecked") NativeReader<String> reader = (NativeReader<String>) ReaderRegistry.defaultRegistry().create(source, null, null, null);
    assertNotNull(reader);
    List<String> expected = new ArrayList<>();
    for (List<String> data : allData) {
        expected.addAll(data);
    }
    assertThat(readAllFromReader(reader), containsInAnyOrder(expected.toArray()));
}
Also used : NativeReader(org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader) ArrayList(java.util.ArrayList) Structs.addStringList(org.apache.beam.runners.dataflow.util.Structs.addStringList) ArrayList(java.util.ArrayList) List(java.util.List) Structs.addList(org.apache.beam.runners.dataflow.util.Structs.addList) Source(com.google.api.services.dataflow.model.Source) Test(org.junit.Test)

Example 42 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class ConcatReaderFactoryTest method testCreateConcatReaderWithOneSubSource.

@Test
public void testCreateConcatReaderWithOneSubSource() throws Exception {
    List<List<String>> allData = createInMemorySourceData(1, 10);
    Source source = createSourcesWithInMemorySources(allData);
    @SuppressWarnings("unchecked") NativeReader<String> reader = (NativeReader<String>) ReaderRegistry.defaultRegistry().create(source, null, null, null);
    assertNotNull(reader);
    List<String> expected = new ArrayList<>();
    for (List<String> data : allData) {
        expected.addAll(data);
    }
    assertThat(readAllFromReader(reader), containsInAnyOrder(expected.toArray()));
}
Also used : NativeReader(org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader) ArrayList(java.util.ArrayList) Structs.addStringList(org.apache.beam.runners.dataflow.util.Structs.addStringList) ArrayList(java.util.ArrayList) List(java.util.List) Structs.addList(org.apache.beam.runners.dataflow.util.Structs.addList) Source(com.google.api.services.dataflow.model.Source) Test(org.junit.Test)

Example 43 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class StreamingDataflowWorkerTest method makeWindowingSourceInstruction.

private ParallelInstruction makeWindowingSourceInstruction(Coder<?> coder) {
    CloudObject timerCloudObject = CloudObject.forClassName("com.google.cloud.dataflow.sdk.util.TimerOrElement$TimerOrElementCoder");
    List<CloudObject> component = Collections.singletonList(CloudObjects.asCloudObject(coder, /*sdkComponents=*/
    null));
    Structs.addList(timerCloudObject, PropertyNames.COMPONENT_ENCODINGS, component);
    CloudObject encodedCoder = CloudObject.forClassName("kind:windowed_value");
    Structs.addBoolean(encodedCoder, PropertyNames.IS_WRAPPER, true);
    Structs.addList(encodedCoder, PropertyNames.COMPONENT_ENCODINGS, ImmutableList.of(timerCloudObject, CloudObjects.asCloudObject(IntervalWindowCoder.of(), /*sdkComponents=*/
    null)));
    return new ParallelInstruction().setSystemName(DEFAULT_SOURCE_SYSTEM_NAME).setOriginalName(DEFAULT_SOURCE_ORIGINAL_NAME).setRead(new ReadInstruction().setSource(new Source().setSpec(CloudObject.forClass(WindowingWindmillReader.class)).setCodec(encodedCoder))).setOutputs(Arrays.asList(new InstructionOutput().setName(Long.toString(idGenerator.get())).setCodec(encodedCoder).setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME).setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME)));
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) ReadInstruction(com.google.api.services.dataflow.model.ReadInstruction) TestCountingSource(org.apache.beam.runners.dataflow.worker.testing.TestCountingSource) Source(com.google.api.services.dataflow.model.Source)

Example 44 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class ShuffleReaderFactoryTest method runTestCreateShuffleReader.

<T extends NativeReader> T runTestCreateShuffleReader(byte[] shuffleReaderConfig, @Nullable String start, @Nullable String end, CloudObject encoding, BatchModeExecutionContext context, Class<T> shuffleReaderClass, String shuffleSourceAlias) throws Exception {
    CloudObject spec = CloudObject.forClassName(shuffleSourceAlias);
    addString(spec, "shuffle_reader_config", encodeBase64String(shuffleReaderConfig));
    if (start != null) {
        addString(spec, "start_shuffle_position", start);
    }
    if (end != null) {
        addString(spec, "end_shuffle_position", end);
    }
    Source cloudSource = new Source();
    cloudSource.setSpec(spec);
    cloudSource.setCodec(encoding);
    NativeReader<?> reader = ReaderRegistry.defaultRegistry().create(cloudSource, PipelineOptionsFactory.create(), context, null);
    assertThat(reader, new IsInstanceOf(shuffleReaderClass));
    return (T) reader;
}
Also used : CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) IsInstanceOf(org.hamcrest.core.IsInstanceOf) Source(com.google.api.services.dataflow.model.Source)

Example 45 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class ReaderFactoryTest method testCreateUnknownReader.

@Test
public void testCreateUnknownReader() throws Exception {
    CloudObject spec = CloudObject.forClassName("UnknownSource");
    Source cloudSource = new Source();
    cloudSource.setSpec(spec);
    cloudSource.setCodec(CloudObjects.asCloudObject(StringUtf8Coder.of(), /*sdkComponents=*/
    null));
    try {
        PipelineOptions options = PipelineOptionsFactory.create();
        ReaderRegistry.defaultRegistry().create(cloudSource, options, BatchModeExecutionContext.forTesting(options, "testStage"), null);
        Assert.fail("should have thrown an exception");
    } catch (Exception exn) {
        assertThat(exn.toString(), CoreMatchers.containsString("Unable to create a Reader"));
    }
}
Also used : CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Source(com.google.api.services.dataflow.model.Source) IOException(java.io.IOException) NoSuchElementException(java.util.NoSuchElementException) Test(org.junit.Test)

Aggregations

Source (com.google.api.services.dataflow.model.Source)51 Test (org.junit.Test)31 ArrayList (java.util.ArrayList)20 WindowedValue (org.apache.beam.sdk.util.WindowedValue)18 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)16 Map (java.util.Map)15 Callable (java.util.concurrent.Callable)15 Future (java.util.concurrent.Future)15 HashMap (java.util.HashMap)13 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)12 SortedMap (java.util.SortedMap)11 TreeMap (java.util.TreeMap)11 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)8 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)7 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)6 KV (org.apache.beam.sdk.values.KV)6 Collection (java.util.Collection)5 List (java.util.List)5 IsmRecord (org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord)5 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)5