Search in sources :

Example 21 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class WorkerCustomSourcesTest method testSplittingProducedInvalidSource.

@Test
public void testSplittingProducedInvalidSource() throws Exception {
    com.google.api.services.dataflow.model.Source cloudSource = translateIOToCloudSource(new SourceProducingInvalidSplits("original", null), options);
    expectedException.expect(IllegalArgumentException.class);
    expectedException.expectMessage(allOf(containsString("Splitting a valid source produced an invalid source"), containsString("original"), containsString("badBundle")));
    expectedException.expectCause(hasMessage(containsString("intentionally invalid")));
    performSplit(cloudSource, options, null, /*desiredBundleSizeBytes*/
    null, /* numBundles limit */
    null);
}
Also used : Source(com.google.api.services.dataflow.model.Source) Test(org.junit.Test)

Example 22 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class WorkerCustomSourcesTest method testFailureToStartReadingIncludesSourceDetails.

@Test
public void testFailureToStartReadingIncludesSourceDetails() throws Exception {
    com.google.api.services.dataflow.model.Source source = translateIOToCloudSource(new SourceProducingFailingReader(), options);
    // Exception object. So instead we use Throwables.getStackTraceAsString and match on that.
    try {
        readElemsFromSource(options, source);
        fail("Expected to fail");
    } catch (Exception e) {
        assertThat(getStackTraceAsString(e), allOf(containsString("Intentional error"), containsString("Some description")));
    }
}
Also used : Source(com.google.api.services.dataflow.model.Source) IOException(java.io.IOException) NoSuchElementException(java.util.NoSuchElementException) ExpectedException(org.junit.rules.ExpectedException) Test(org.junit.Test)

Example 23 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class WorkerCustomSourcesTest method testUnboundedSplits.

@Test
public void testUnboundedSplits() throws Exception {
    com.google.api.services.dataflow.model.Source source = serializeToCloudSource(new TestCountingSource(Integer.MAX_VALUE), options);
    List<String> serializedSplits = getStrings(source.getSpec(), WorkerCustomSources.SERIALIZED_SOURCE_SPLITS, null);
    assertEquals(20, serializedSplits.size());
    for (String serializedSplit : serializedSplits) {
        assertTrue(deserializeFromByteArray(decodeBase64(serializedSplit), "source") instanceof TestCountingSource);
    }
}
Also used : TestCountingSource(org.apache.beam.runners.dataflow.worker.testing.TestCountingSource) Source(com.google.api.services.dataflow.model.Source) Matchers.containsString(org.hamcrest.Matchers.containsString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Throwables.getStackTraceAsString(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Throwables.getStackTraceAsString) Test(org.junit.Test)

Example 24 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class WorkerCustomSourcesTest method stepToCloudSource.

private static com.google.api.services.dataflow.model.Source stepToCloudSource(Step step) throws Exception {
    com.google.api.services.dataflow.model.Source res = dictionaryToCloudSource(getDictionary(step.getProperties(), PropertyNames.SOURCE_STEP_INPUT));
    // Encoding is specified in the step, not in the source itself.  This is
    // normal: incoming Dataflow API Source objects in map tasks will have the
    // encoding filled in from the step's output encoding.
    @SuppressWarnings("unchecked") List<Map<String, Object>> outputInfo = (List<Map<String, Object>>) step.getProperties().get(PropertyNames.OUTPUT_INFO);
    CloudObject encoding = CloudObject.fromSpec(getObject(outputInfo.get(0), PropertyNames.ENCODING));
    res.setCodec(encoding);
    return res;
}
Also used : CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) Source(com.google.api.services.dataflow.model.Source) ArrayList(java.util.ArrayList) List(java.util.List) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) Structs.getObject(org.apache.beam.runners.dataflow.util.Structs.getObject) Matchers.containsString(org.hamcrest.Matchers.containsString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Throwables.getStackTraceAsString(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Throwables.getStackTraceAsString) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map)

Example 25 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class LengthPrefixUnknownCoders method forSideInputInfos.

/**
 * Wraps unknown coders on every {@link SideInputInfo} with length prefixes and also replaces the
 * wrapped coder with a byte array coder if requested.
 */
public static List<SideInputInfo> forSideInputInfos(List<SideInputInfo> sideInputInfos, boolean replaceWithByteArrayCoder) {
    ImmutableList.Builder<SideInputInfo> updatedSideInputInfos = ImmutableList.builder();
    for (SideInputInfo sideInputInfo : sideInputInfos) {
        try {
            SideInputInfo updatedSideInputInfo = clone(sideInputInfo, SideInputInfo.class);
            for (Source source : updatedSideInputInfo.getSources()) {
                source.setCodec(forCodec(source.getCodec(), replaceWithByteArrayCoder));
            }
            updatedSideInputInfos.add(updatedSideInputInfo);
        } catch (IOException e) {
            throw new RuntimeException(String.format("Failed to replace unknown coder with " + "LengthPrefixCoder for : {%s}", sideInputInfo), e);
        }
    }
    return updatedSideInputInfos.build();
}
Also used : SideInputInfo(com.google.api.services.dataflow.model.SideInputInfo) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) IOException(java.io.IOException) Source(com.google.api.services.dataflow.model.Source)

Aggregations

Source (com.google.api.services.dataflow.model.Source)51 Test (org.junit.Test)31 ArrayList (java.util.ArrayList)20 WindowedValue (org.apache.beam.sdk.util.WindowedValue)18 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)16 Map (java.util.Map)15 Callable (java.util.concurrent.Callable)15 Future (java.util.concurrent.Future)15 HashMap (java.util.HashMap)13 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)12 SortedMap (java.util.SortedMap)11 TreeMap (java.util.TreeMap)11 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)8 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)7 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)6 KV (org.apache.beam.sdk.values.KV)6 Collection (java.util.Collection)5 List (java.util.List)5 IsmRecord (org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord)5 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)5