use of com.google.api.services.dataflow.model.Source in project beam by apache.
the class IsmSideInputReaderTest method newIsmSource.
/**
* Returns a new Source for the given ISM file using the specified coder.
*/
private <K, V> Source newIsmSource(IsmRecordCoder<WindowedValue<V>> coder, String tmpFilePath) {
Source source = new Source();
source.setCodec(CloudObjects.asCloudObject(WindowedValue.getFullCoder(coder, GLOBAL_WINDOW_CODER), /*sdkComponents=*/
null));
source.setSpec(new HashMap<String, Object>());
source.getSpec().put(PropertyNames.OBJECT_TYPE_NAME, "IsmSource");
source.getSpec().put(WorkerPropertyNames.FILENAME, tmpFilePath);
return source;
}
use of com.google.api.services.dataflow.model.Source in project beam by apache.
the class IsmSideInputReaderTest method testMultimapViewInWindow.
@Test
public void testMultimapViewInWindow() throws Exception {
// Note that we purposely use byte[]s as keys to force structural equality testing
// versus using java equality testing. Since we want to define a duplicate key for
// the multimap, we specifically use the same instance of the byte[].
byte[] duplicateKey = new byte[] { 0x01 };
Coder<WindowedValue<Long>> valueCoder = WindowedValue.getFullCoder(VarLongCoder.of(), INTERVAL_WINDOW_CODER);
final ListMultimap<byte[], WindowedValue<Long>> firstWindow = ImmutableListMultimap.<byte[], WindowedValue<Long>>builder().put(new byte[] { 0x00 }, valueInIntervalWindow(12L, 10)).put(duplicateKey, valueInIntervalWindow(22L, 10)).put(duplicateKey, valueInIntervalWindow(23L, 10)).put(new byte[] { 0x02 }, valueInIntervalWindow(32L, 10)).build();
final ListMultimap<byte[], WindowedValue<Long>> secondWindow = ImmutableListMultimap.<byte[], WindowedValue<Long>>builder().put(new byte[] { 0x00 }, valueInIntervalWindow(42L, 20)).put(new byte[] { 0x03 }, valueInIntervalWindow(52L, 20)).put(new byte[] { 0x02 }, valueInIntervalWindow(62L, 20)).build();
final ListMultimap<byte[], WindowedValue<Long>> thirdWindow = ImmutableListMultimap.<byte[], WindowedValue<Long>>builder().put(new byte[] { 0x02 }, valueInIntervalWindow(73L, 30)).put(new byte[] { 0x04 }, valueInIntervalWindow(82L, 30)).put(new byte[] { 0x05 }, valueInIntervalWindow(92L, 30)).build();
final PCollectionView<MultimapView<byte[], WindowedValue<Long>>> view = DataflowPortabilityPCollectionView.with(new TupleTag<>(), FullWindowedValueCoder.of(KvCoder.of(ByteArrayCoder.of(), valueCoder), INTERVAL_WINDOW_CODER));
IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(1, 0, ImmutableList.of(ByteArrayCoder.of(), INTERVAL_WINDOW_CODER, BigEndianLongCoder.of()), valueCoder);
Multimap<Integer, IsmRecord<WindowedValue<Long>>> elementsPerShard = forMap(ismCoder, firstWindow);
elementsPerShard.putAll(forMap(ismCoder, secondWindow));
elementsPerShard.putAll(forMap(ismCoder, thirdWindow));
List<IsmRecord<WindowedValue<Long>>> firstElements = new ArrayList<>();
List<IsmRecord<WindowedValue<Long>>> secondElements = new ArrayList<>();
for (Map.Entry<Integer, Collection<IsmRecord<WindowedValue<Long>>>> entry : elementsPerShard.asMap().entrySet()) {
if (entry.getKey() % 2 == 0) {
firstElements.addAll(entry.getValue());
} else {
secondElements.addAll(entry.getValue());
}
}
// Ensure that each file will have some records.
checkState(!firstElements.isEmpty());
checkState(!secondElements.isEmpty());
Source sourceA = initInputFile(firstElements, ismCoder);
Source sourceB = initInputFile(secondElements, ismCoder);
final IsmSideInputReader reader = sideInputReader(view.getTagInternal().getId(), sourceA, sourceB);
List<Callable<Map<BoundedWindow, MultimapView<byte[], WindowedValue<Long>>>>> tasks = new ArrayList<>();
for (int i = 0; i < 3; ++i) {
tasks.add(() -> {
// Store a strong reference to the returned value so that the logical reference
// cache is not cleared for this test.
MultimapView<byte[], WindowedValue<Long>> firstValues = reader.get(view, intervalWindow(10));
MultimapView<byte[], WindowedValue<Long>> secondValues = reader.get(view, intervalWindow(20));
MultimapView<byte[], WindowedValue<Long>> thirdValues = reader.get(view, intervalWindow(30));
for (Map.Entry<byte[], Collection<WindowedValue<Long>>> entry : firstWindow.asMap().entrySet()) {
verifyIterable(entry.getValue(), firstValues.get(entry.getKey()));
}
for (Map.Entry<byte[], Collection<WindowedValue<Long>>> entry : secondWindow.asMap().entrySet()) {
verifyIterable(entry.getValue(), secondValues.get(entry.getKey()));
}
for (Map.Entry<byte[], Collection<WindowedValue<Long>>> entry : thirdWindow.asMap().entrySet()) {
verifyIterable(entry.getValue(), thirdValues.get(entry.getKey()));
}
// Assert that the same value reference was returned showing that it was cached.
assertSame(firstValues, reader.get(view, intervalWindow(10)));
assertSame(secondValues, reader.get(view, intervalWindow(20)));
assertSame(thirdValues, reader.get(view, intervalWindow(30)));
return ImmutableMap.of(intervalWindow(10), firstValues, intervalWindow(20), secondValues, intervalWindow(30), thirdValues);
});
}
List<Future<Map<BoundedWindow, MultimapView<byte[], WindowedValue<Long>>>>> results = pipelineOptions.getExecutorService().invokeAll(tasks);
Map<BoundedWindow, MultimapView<byte[], WindowedValue<Long>>> value = results.get(0).get();
// Assert that all threads got back the same reference
for (Future<Map<BoundedWindow, MultimapView<byte[], WindowedValue<Long>>>> result : results) {
assertEquals(value, result.get());
for (Map.Entry<BoundedWindow, MultimapView<byte[], WindowedValue<Long>>> entry : result.get().entrySet()) {
assertSame(value.get(entry.getKey()), entry.getValue());
}
}
}
use of com.google.api.services.dataflow.model.Source in project beam by apache.
the class IsmSideInputReaderTest method testSingletonMultimapInWindow.
@Test
public void testSingletonMultimapInWindow() throws Exception {
IntervalWindow firstWindow = new IntervalWindow(new Instant(0L), new Instant(100L));
IntervalWindow secondWindow = new IntervalWindow(new Instant(50L), new Instant(150L));
IntervalWindow emptyWindow = new IntervalWindow(new Instant(75L), new Instant(175L));
// Collection is iterable, and this is immutable
@SuppressWarnings({ "unchecked", "rawtypes" }) final Map<IntervalWindow, WindowedValue<Map<String, Iterable<Long>>>> elements = ImmutableMap.<IntervalWindow, WindowedValue<Map<String, Iterable<Long>>>>builder().put(firstWindow, WindowedValue.of((Map) ImmutableListMultimap.<String, Long>builder().put("foo", 0L).put("foo", 2L).put("bar", -1L).build().asMap(), new Instant(7), firstWindow, PaneInfo.NO_FIRING)).put(secondWindow, WindowedValue.of((Map) ImmutableListMultimap.<String, Long>builder().put("bar", -1L).put("baz", 1L).put("baz", 3L).build().asMap(), new Instant(53L), secondWindow, PaneInfo.NO_FIRING)).build();
StringUtf8Coder strCoder = StringUtf8Coder.of();
Coder<Map<String, Iterable<Long>>> mapCoder = MapCoder.of(strCoder, IterableCoder.of(VarLongCoder.of()));
final PCollectionView<Map<String, Iterable<Long>>> view = Pipeline.create().apply(Create.empty(KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()))).apply(Window.into(FixedWindows.of(Duration.millis(100L)))).apply(View.asMultimap());
IsmRecordCoder<WindowedValue<Map<String, Iterable<Long>>>> recordCoder = IsmRecordCoder.of(1, 0, ImmutableList.<Coder<?>>of(INTERVAL_WINDOW_CODER), WindowedValue.getFullCoder(mapCoder, INTERVAL_WINDOW_CODER));
final Source source = initInputFile(fromValues(elements.values()), recordCoder);
final IsmSideInputReader reader = sideInputReader(view.getTagInternal().getId(), source);
List<Callable<Map<BoundedWindow, Map<String, Iterable<Long>>>>> tasks = new ArrayList<>();
for (int i = 0; i < NUM_THREADS; ++i) {
tasks.add(() -> {
// Store a strong reference to the returned value so that the logical reference
// cache is not cleared for this test.
Map<String, Iterable<Long>> value = reader.get(view, firstWindow);
assertEquals(elements.get(firstWindow).getValue(), value);
// Assert that the same value reference was returned showing that it was cached.
assertSame(value, reader.get(view, firstWindow));
Map<String, Iterable<Long>> secondValue = reader.get(view, secondWindow);
assertEquals(elements.get(secondWindow).getValue(), secondValue);
// Assert that the same value reference was returned showing that it was cached.
assertSame(secondValue, reader.get(view, secondWindow));
Map<String, Iterable<Long>> emptyValue = reader.get(view, emptyWindow);
assertThat(emptyValue.keySet(), empty());
Map<BoundedWindow, Map<String, Iterable<Long>>> result = ImmutableMap.<BoundedWindow, Map<String, Iterable<Long>>>builder().put(firstWindow, value).put(secondWindow, secondValue).put(emptyWindow, emptyValue).build();
return result;
});
}
List<Future<Map<BoundedWindow, Map<String, Iterable<Long>>>>> results = pipelineOptions.getExecutorService().invokeAll(tasks);
Map<BoundedWindow, Map<String, Iterable<Long>>> value = results.get(0).get();
for (Future<Map<BoundedWindow, Map<String, Iterable<Long>>>> result : results) {
assertEquals(value, result.get());
for (Map.Entry<BoundedWindow, Map<String, Iterable<Long>>> entry : result.get().entrySet()) {
assertSame(value.get(entry.getKey()), entry.getValue());
}
}
}
use of com.google.api.services.dataflow.model.Source in project beam by apache.
the class IsmSideInputReaderTest method testSingletonInWindow.
@Test
public void testSingletonInWindow() throws Exception {
Coder<WindowedValue<Long>> valueCoder = WindowedValue.getFullCoder(VarLongCoder.of(), INTERVAL_WINDOW_CODER);
IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(1, 0, ImmutableList.<Coder<?>>of(INTERVAL_WINDOW_CODER), valueCoder);
final List<WindowedValue<Long>> elements = Arrays.asList(valueInIntervalWindow(12, 0), valueInIntervalWindow(17, 10), valueInIntervalWindow(28, 20));
final Long defaultValue = 42L;
final PCollectionView<Long> view = Pipeline.create().apply(Create.empty(VarLongCoder.of())).apply(Window.into(FixedWindows.of(Duration.millis(1)))).apply(View.<Long>asSingleton().withDefaultValue(defaultValue));
Source sourceA = initInputFile(fromValues(elements).subList(0, 1), ismCoder);
Source sourceB = initInputFile(fromValues(elements).subList(1, 3), ismCoder);
final IsmSideInputReader reader = sideInputReader(view.getTagInternal().getId(), sourceA, sourceB);
List<Callable<Map<BoundedWindow, Long>>> tasks = new ArrayList<>();
for (int i = 0; i < NUM_THREADS; ++i) {
tasks.add(() -> {
Map<BoundedWindow, Long> rval = new HashMap<>();
for (WindowedValue<Long> element : elements) {
// Store a strong reference to the returned value so that the logical reference
// cache is not cleared for this test.
Long value = reader.get(view, windowOf(element));
assertEquals(element.getValue(), value);
// Assert that the same value reference was returned showing that it was cached.
assertSame(value, reader.get(view, windowOf(element)));
rval.put(windowOf(element), value);
}
// Check that if we don't find a value for a given window, we return the default.
assertEquals(defaultValue, reader.get(view, intervalWindow(30)));
return rval;
});
}
List<Future<Map<BoundedWindow, Long>>> results = pipelineOptions.getExecutorService().invokeAll(tasks);
Map<BoundedWindow, Long> value = results.get(0).get();
// Assert that all threads got back the same reference
for (Future<Map<BoundedWindow, Long>> result : results) {
assertEquals(value, result.get());
for (Map.Entry<BoundedWindow, Long> entry : result.get().entrySet()) {
assertSame(value.get(entry.getKey()), entry.getValue());
}
}
}
use of com.google.api.services.dataflow.model.Source in project beam by apache.
the class IsmSideInputReaderTest method testSingletonMapInWindow.
@Test
public void testSingletonMapInWindow() throws Exception {
IntervalWindow firstWindow = new IntervalWindow(new Instant(0L), new Instant(100L));
IntervalWindow secondWindow = new IntervalWindow(new Instant(50L), new Instant(150L));
IntervalWindow emptyWindow = new IntervalWindow(new Instant(75L), new Instant(175L));
final Map<IntervalWindow, WindowedValue<Map<String, Long>>> elements = ImmutableMap.<IntervalWindow, WindowedValue<Map<String, Long>>>builder().put(firstWindow, WindowedValue.of(ImmutableMap.<String, Long>builder().put("foo", 0L).put("bar", -1L).build(), new Instant(7), firstWindow, PaneInfo.NO_FIRING)).put(secondWindow, WindowedValue.of(ImmutableMap.<String, Long>builder().put("bar", -1L).put("baz", 1L).build(), new Instant(53L), secondWindow, PaneInfo.NO_FIRING)).build();
Coder<Map<String, Long>> mapCoder = MapCoder.of(StringUtf8Coder.of(), VarLongCoder.of());
final PCollectionView<Map<String, Long>> view = Pipeline.create().apply(Create.empty(KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()))).apply(Window.into(SlidingWindows.of(Duration.millis(100L)).every(Duration.millis(50L)))).apply(View.asMap());
IsmRecordCoder<WindowedValue<Map<String, Long>>> recordCoder = IsmRecordCoder.of(1, 0, ImmutableList.<Coder<?>>of(INTERVAL_WINDOW_CODER), WindowedValue.getFullCoder(mapCoder, INTERVAL_WINDOW_CODER));
final Source source = initInputFile(fromValues(elements.values()), recordCoder);
final IsmSideInputReader reader = sideInputReader(view.getTagInternal().getId(), source);
List<Callable<Map<BoundedWindow, Map<String, Long>>>> tasks = new ArrayList<>();
for (int i = 0; i < NUM_THREADS; ++i) {
tasks.add(() -> {
// Store a strong reference to the returned value so that the logical reference
// cache is not cleared for this test.
Map<String, Long> value = reader.get(view, firstWindow);
assertEquals(elements.get(firstWindow).getValue(), value);
// Assert that the same value reference was returned showing that it was cached.
assertSame(value, reader.get(view, firstWindow));
Map<String, Long> secondValue = reader.get(view, secondWindow);
assertEquals(elements.get(secondWindow).getValue(), secondValue);
// Assert that the same value reference was returned showing that it was cached.
assertSame(secondValue, reader.get(view, secondWindow));
Map<String, Long> emptyValue = reader.get(view, emptyWindow);
assertThat(emptyValue.keySet(), empty());
Map<BoundedWindow, Map<String, Long>> result = ImmutableMap.<BoundedWindow, Map<String, Long>>builder().put(firstWindow, value).put(secondWindow, secondValue).put(emptyWindow, emptyValue).build();
return result;
});
}
List<Future<Map<BoundedWindow, Map<String, Long>>>> results = pipelineOptions.getExecutorService().invokeAll(tasks);
// Assert that all threads got back the same reference
Map<BoundedWindow, Map<String, Long>> value = results.get(0).get();
for (Future<Map<BoundedWindow, Map<String, Long>>> result : results) {
assertEquals(value, result.get());
for (Map.Entry<BoundedWindow, Map<String, Long>> entry : result.get().entrySet()) {
assertSame(value.get(entry.getKey()), entry.getValue());
}
}
}
Aggregations