Search in sources :

Example 31 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class IsmSideInputReaderTest method newIsmSource.

/**
 * Returns a new Source for the given ISM file using the specified coder.
 */
private <K, V> Source newIsmSource(IsmRecordCoder<WindowedValue<V>> coder, String tmpFilePath) {
    Source source = new Source();
    source.setCodec(CloudObjects.asCloudObject(WindowedValue.getFullCoder(coder, GLOBAL_WINDOW_CODER), /*sdkComponents=*/
    null));
    source.setSpec(new HashMap<String, Object>());
    source.getSpec().put(PropertyNames.OBJECT_TYPE_NAME, "IsmSource");
    source.getSpec().put(WorkerPropertyNames.FILENAME, tmpFilePath);
    return source;
}
Also used : Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) Source(com.google.api.services.dataflow.model.Source)

Example 32 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class IsmSideInputReaderTest method testMultimapViewInWindow.

@Test
public void testMultimapViewInWindow() throws Exception {
    // Note that we purposely use byte[]s as keys to force structural equality testing
    // versus using java equality testing. Since we want to define a duplicate key for
    // the multimap, we specifically use the same instance of the byte[].
    byte[] duplicateKey = new byte[] { 0x01 };
    Coder<WindowedValue<Long>> valueCoder = WindowedValue.getFullCoder(VarLongCoder.of(), INTERVAL_WINDOW_CODER);
    final ListMultimap<byte[], WindowedValue<Long>> firstWindow = ImmutableListMultimap.<byte[], WindowedValue<Long>>builder().put(new byte[] { 0x00 }, valueInIntervalWindow(12L, 10)).put(duplicateKey, valueInIntervalWindow(22L, 10)).put(duplicateKey, valueInIntervalWindow(23L, 10)).put(new byte[] { 0x02 }, valueInIntervalWindow(32L, 10)).build();
    final ListMultimap<byte[], WindowedValue<Long>> secondWindow = ImmutableListMultimap.<byte[], WindowedValue<Long>>builder().put(new byte[] { 0x00 }, valueInIntervalWindow(42L, 20)).put(new byte[] { 0x03 }, valueInIntervalWindow(52L, 20)).put(new byte[] { 0x02 }, valueInIntervalWindow(62L, 20)).build();
    final ListMultimap<byte[], WindowedValue<Long>> thirdWindow = ImmutableListMultimap.<byte[], WindowedValue<Long>>builder().put(new byte[] { 0x02 }, valueInIntervalWindow(73L, 30)).put(new byte[] { 0x04 }, valueInIntervalWindow(82L, 30)).put(new byte[] { 0x05 }, valueInIntervalWindow(92L, 30)).build();
    final PCollectionView<MultimapView<byte[], WindowedValue<Long>>> view = DataflowPortabilityPCollectionView.with(new TupleTag<>(), FullWindowedValueCoder.of(KvCoder.of(ByteArrayCoder.of(), valueCoder), INTERVAL_WINDOW_CODER));
    IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(1, 0, ImmutableList.of(ByteArrayCoder.of(), INTERVAL_WINDOW_CODER, BigEndianLongCoder.of()), valueCoder);
    Multimap<Integer, IsmRecord<WindowedValue<Long>>> elementsPerShard = forMap(ismCoder, firstWindow);
    elementsPerShard.putAll(forMap(ismCoder, secondWindow));
    elementsPerShard.putAll(forMap(ismCoder, thirdWindow));
    List<IsmRecord<WindowedValue<Long>>> firstElements = new ArrayList<>();
    List<IsmRecord<WindowedValue<Long>>> secondElements = new ArrayList<>();
    for (Map.Entry<Integer, Collection<IsmRecord<WindowedValue<Long>>>> entry : elementsPerShard.asMap().entrySet()) {
        if (entry.getKey() % 2 == 0) {
            firstElements.addAll(entry.getValue());
        } else {
            secondElements.addAll(entry.getValue());
        }
    }
    // Ensure that each file will have some records.
    checkState(!firstElements.isEmpty());
    checkState(!secondElements.isEmpty());
    Source sourceA = initInputFile(firstElements, ismCoder);
    Source sourceB = initInputFile(secondElements, ismCoder);
    final IsmSideInputReader reader = sideInputReader(view.getTagInternal().getId(), sourceA, sourceB);
    List<Callable<Map<BoundedWindow, MultimapView<byte[], WindowedValue<Long>>>>> tasks = new ArrayList<>();
    for (int i = 0; i < 3; ++i) {
        tasks.add(() -> {
            // Store a strong reference to the returned value so that the logical reference
            // cache is not cleared for this test.
            MultimapView<byte[], WindowedValue<Long>> firstValues = reader.get(view, intervalWindow(10));
            MultimapView<byte[], WindowedValue<Long>> secondValues = reader.get(view, intervalWindow(20));
            MultimapView<byte[], WindowedValue<Long>> thirdValues = reader.get(view, intervalWindow(30));
            for (Map.Entry<byte[], Collection<WindowedValue<Long>>> entry : firstWindow.asMap().entrySet()) {
                verifyIterable(entry.getValue(), firstValues.get(entry.getKey()));
            }
            for (Map.Entry<byte[], Collection<WindowedValue<Long>>> entry : secondWindow.asMap().entrySet()) {
                verifyIterable(entry.getValue(), secondValues.get(entry.getKey()));
            }
            for (Map.Entry<byte[], Collection<WindowedValue<Long>>> entry : thirdWindow.asMap().entrySet()) {
                verifyIterable(entry.getValue(), thirdValues.get(entry.getKey()));
            }
            // Assert that the same value reference was returned showing that it was cached.
            assertSame(firstValues, reader.get(view, intervalWindow(10)));
            assertSame(secondValues, reader.get(view, intervalWindow(20)));
            assertSame(thirdValues, reader.get(view, intervalWindow(30)));
            return ImmutableMap.of(intervalWindow(10), firstValues, intervalWindow(20), secondValues, intervalWindow(30), thirdValues);
        });
    }
    List<Future<Map<BoundedWindow, MultimapView<byte[], WindowedValue<Long>>>>> results = pipelineOptions.getExecutorService().invokeAll(tasks);
    Map<BoundedWindow, MultimapView<byte[], WindowedValue<Long>>> value = results.get(0).get();
    // Assert that all threads got back the same reference
    for (Future<Map<BoundedWindow, MultimapView<byte[], WindowedValue<Long>>>> result : results) {
        assertEquals(value, result.get());
        for (Map.Entry<BoundedWindow, MultimapView<byte[], WindowedValue<Long>>> entry : result.get().entrySet()) {
            assertSame(value.get(entry.getKey()), entry.getValue());
        }
    }
}
Also used : MultimapView(org.apache.beam.sdk.transforms.Materializations.MultimapView) ArrayList(java.util.ArrayList) IsmRecord(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord) Source(com.google.api.services.dataflow.model.Source) Callable(java.util.concurrent.Callable) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Collection(java.util.Collection) Future(java.util.concurrent.Future) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 33 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class IsmSideInputReaderTest method testSingletonMultimapInWindow.

@Test
public void testSingletonMultimapInWindow() throws Exception {
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0L), new Instant(100L));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(50L), new Instant(150L));
    IntervalWindow emptyWindow = new IntervalWindow(new Instant(75L), new Instant(175L));
    // Collection is iterable, and this is immutable
    @SuppressWarnings({ "unchecked", "rawtypes" }) final Map<IntervalWindow, WindowedValue<Map<String, Iterable<Long>>>> elements = ImmutableMap.<IntervalWindow, WindowedValue<Map<String, Iterable<Long>>>>builder().put(firstWindow, WindowedValue.of((Map) ImmutableListMultimap.<String, Long>builder().put("foo", 0L).put("foo", 2L).put("bar", -1L).build().asMap(), new Instant(7), firstWindow, PaneInfo.NO_FIRING)).put(secondWindow, WindowedValue.of((Map) ImmutableListMultimap.<String, Long>builder().put("bar", -1L).put("baz", 1L).put("baz", 3L).build().asMap(), new Instant(53L), secondWindow, PaneInfo.NO_FIRING)).build();
    StringUtf8Coder strCoder = StringUtf8Coder.of();
    Coder<Map<String, Iterable<Long>>> mapCoder = MapCoder.of(strCoder, IterableCoder.of(VarLongCoder.of()));
    final PCollectionView<Map<String, Iterable<Long>>> view = Pipeline.create().apply(Create.empty(KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()))).apply(Window.into(FixedWindows.of(Duration.millis(100L)))).apply(View.asMultimap());
    IsmRecordCoder<WindowedValue<Map<String, Iterable<Long>>>> recordCoder = IsmRecordCoder.of(1, 0, ImmutableList.<Coder<?>>of(INTERVAL_WINDOW_CODER), WindowedValue.getFullCoder(mapCoder, INTERVAL_WINDOW_CODER));
    final Source source = initInputFile(fromValues(elements.values()), recordCoder);
    final IsmSideInputReader reader = sideInputReader(view.getTagInternal().getId(), source);
    List<Callable<Map<BoundedWindow, Map<String, Iterable<Long>>>>> tasks = new ArrayList<>();
    for (int i = 0; i < NUM_THREADS; ++i) {
        tasks.add(() -> {
            // Store a strong reference to the returned value so that the logical reference
            // cache is not cleared for this test.
            Map<String, Iterable<Long>> value = reader.get(view, firstWindow);
            assertEquals(elements.get(firstWindow).getValue(), value);
            // Assert that the same value reference was returned showing that it was cached.
            assertSame(value, reader.get(view, firstWindow));
            Map<String, Iterable<Long>> secondValue = reader.get(view, secondWindow);
            assertEquals(elements.get(secondWindow).getValue(), secondValue);
            // Assert that the same value reference was returned showing that it was cached.
            assertSame(secondValue, reader.get(view, secondWindow));
            Map<String, Iterable<Long>> emptyValue = reader.get(view, emptyWindow);
            assertThat(emptyValue.keySet(), empty());
            Map<BoundedWindow, Map<String, Iterable<Long>>> result = ImmutableMap.<BoundedWindow, Map<String, Iterable<Long>>>builder().put(firstWindow, value).put(secondWindow, secondValue).put(emptyWindow, emptyValue).build();
            return result;
        });
    }
    List<Future<Map<BoundedWindow, Map<String, Iterable<Long>>>>> results = pipelineOptions.getExecutorService().invokeAll(tasks);
    Map<BoundedWindow, Map<String, Iterable<Long>>> value = results.get(0).get();
    for (Future<Map<BoundedWindow, Map<String, Iterable<Long>>>> result : results) {
        assertEquals(value, result.get());
        for (Map.Entry<BoundedWindow, Map<String, Iterable<Long>>> entry : result.get().entrySet()) {
            assertSame(value.get(entry.getKey()), entry.getValue());
        }
    }
}
Also used : ArrayList(java.util.ArrayList) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) Source(com.google.api.services.dataflow.model.Source) Callable(java.util.concurrent.Callable) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Instant(org.joda.time.Instant) Future(java.util.concurrent.Future) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 34 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class IsmSideInputReaderTest method testSingletonInWindow.

@Test
public void testSingletonInWindow() throws Exception {
    Coder<WindowedValue<Long>> valueCoder = WindowedValue.getFullCoder(VarLongCoder.of(), INTERVAL_WINDOW_CODER);
    IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(1, 0, ImmutableList.<Coder<?>>of(INTERVAL_WINDOW_CODER), valueCoder);
    final List<WindowedValue<Long>> elements = Arrays.asList(valueInIntervalWindow(12, 0), valueInIntervalWindow(17, 10), valueInIntervalWindow(28, 20));
    final Long defaultValue = 42L;
    final PCollectionView<Long> view = Pipeline.create().apply(Create.empty(VarLongCoder.of())).apply(Window.into(FixedWindows.of(Duration.millis(1)))).apply(View.<Long>asSingleton().withDefaultValue(defaultValue));
    Source sourceA = initInputFile(fromValues(elements).subList(0, 1), ismCoder);
    Source sourceB = initInputFile(fromValues(elements).subList(1, 3), ismCoder);
    final IsmSideInputReader reader = sideInputReader(view.getTagInternal().getId(), sourceA, sourceB);
    List<Callable<Map<BoundedWindow, Long>>> tasks = new ArrayList<>();
    for (int i = 0; i < NUM_THREADS; ++i) {
        tasks.add(() -> {
            Map<BoundedWindow, Long> rval = new HashMap<>();
            for (WindowedValue<Long> element : elements) {
                // Store a strong reference to the returned value so that the logical reference
                // cache is not cleared for this test.
                Long value = reader.get(view, windowOf(element));
                assertEquals(element.getValue(), value);
                // Assert that the same value reference was returned showing that it was cached.
                assertSame(value, reader.get(view, windowOf(element)));
                rval.put(windowOf(element), value);
            }
            // Check that if we don't find a value for a given window, we return the default.
            assertEquals(defaultValue, reader.get(view, intervalWindow(30)));
            return rval;
        });
    }
    List<Future<Map<BoundedWindow, Long>>> results = pipelineOptions.getExecutorService().invokeAll(tasks);
    Map<BoundedWindow, Long> value = results.get(0).get();
    // Assert that all threads got back the same reference
    for (Future<Map<BoundedWindow, Long>> result : results) {
        assertEquals(value, result.get());
        for (Map.Entry<BoundedWindow, Long> entry : result.get().entrySet()) {
            assertSame(value.get(entry.getKey()), entry.getValue());
        }
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Source(com.google.api.services.dataflow.model.Source) Callable(java.util.concurrent.Callable) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Future(java.util.concurrent.Future) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 35 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class IsmSideInputReaderTest method testSingletonMapInWindow.

@Test
public void testSingletonMapInWindow() throws Exception {
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0L), new Instant(100L));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(50L), new Instant(150L));
    IntervalWindow emptyWindow = new IntervalWindow(new Instant(75L), new Instant(175L));
    final Map<IntervalWindow, WindowedValue<Map<String, Long>>> elements = ImmutableMap.<IntervalWindow, WindowedValue<Map<String, Long>>>builder().put(firstWindow, WindowedValue.of(ImmutableMap.<String, Long>builder().put("foo", 0L).put("bar", -1L).build(), new Instant(7), firstWindow, PaneInfo.NO_FIRING)).put(secondWindow, WindowedValue.of(ImmutableMap.<String, Long>builder().put("bar", -1L).put("baz", 1L).build(), new Instant(53L), secondWindow, PaneInfo.NO_FIRING)).build();
    Coder<Map<String, Long>> mapCoder = MapCoder.of(StringUtf8Coder.of(), VarLongCoder.of());
    final PCollectionView<Map<String, Long>> view = Pipeline.create().apply(Create.empty(KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()))).apply(Window.into(SlidingWindows.of(Duration.millis(100L)).every(Duration.millis(50L)))).apply(View.asMap());
    IsmRecordCoder<WindowedValue<Map<String, Long>>> recordCoder = IsmRecordCoder.of(1, 0, ImmutableList.<Coder<?>>of(INTERVAL_WINDOW_CODER), WindowedValue.getFullCoder(mapCoder, INTERVAL_WINDOW_CODER));
    final Source source = initInputFile(fromValues(elements.values()), recordCoder);
    final IsmSideInputReader reader = sideInputReader(view.getTagInternal().getId(), source);
    List<Callable<Map<BoundedWindow, Map<String, Long>>>> tasks = new ArrayList<>();
    for (int i = 0; i < NUM_THREADS; ++i) {
        tasks.add(() -> {
            // Store a strong reference to the returned value so that the logical reference
            // cache is not cleared for this test.
            Map<String, Long> value = reader.get(view, firstWindow);
            assertEquals(elements.get(firstWindow).getValue(), value);
            // Assert that the same value reference was returned showing that it was cached.
            assertSame(value, reader.get(view, firstWindow));
            Map<String, Long> secondValue = reader.get(view, secondWindow);
            assertEquals(elements.get(secondWindow).getValue(), secondValue);
            // Assert that the same value reference was returned showing that it was cached.
            assertSame(secondValue, reader.get(view, secondWindow));
            Map<String, Long> emptyValue = reader.get(view, emptyWindow);
            assertThat(emptyValue.keySet(), empty());
            Map<BoundedWindow, Map<String, Long>> result = ImmutableMap.<BoundedWindow, Map<String, Long>>builder().put(firstWindow, value).put(secondWindow, secondValue).put(emptyWindow, emptyValue).build();
            return result;
        });
    }
    List<Future<Map<BoundedWindow, Map<String, Long>>>> results = pipelineOptions.getExecutorService().invokeAll(tasks);
    // Assert that all threads got back the same reference
    Map<BoundedWindow, Map<String, Long>> value = results.get(0).get();
    for (Future<Map<BoundedWindow, Map<String, Long>>> result : results) {
        assertEquals(value, result.get());
        for (Map.Entry<BoundedWindow, Map<String, Long>> entry : result.get().entrySet()) {
            assertSame(value.get(entry.getKey()), entry.getValue());
        }
    }
}
Also used : Instant(org.joda.time.Instant) ArrayList(java.util.ArrayList) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) Source(com.google.api.services.dataflow.model.Source) Callable(java.util.concurrent.Callable) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Future(java.util.concurrent.Future) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Aggregations

Source (com.google.api.services.dataflow.model.Source)51 Test (org.junit.Test)31 ArrayList (java.util.ArrayList)20 WindowedValue (org.apache.beam.sdk.util.WindowedValue)18 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)16 Map (java.util.Map)15 Callable (java.util.concurrent.Callable)15 Future (java.util.concurrent.Future)15 HashMap (java.util.HashMap)13 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)12 SortedMap (java.util.SortedMap)11 TreeMap (java.util.TreeMap)11 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)8 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)7 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)6 KV (org.apache.beam.sdk.values.KV)6 Collection (java.util.Collection)5 List (java.util.List)5 IsmRecord (org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord)5 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)5