use of com.google.api.services.dataflow.model.Source in project beam by apache.
the class IsmSideInputReaderTest method testMultimapInWindow.
@Test
public void testMultimapInWindow() throws Exception {
// Note that we purposely use byte[]s as keys to force structural equality testing
// versus using java equality testing.
Coder<WindowedValue<Long>> valueCoder = WindowedValue.getFullCoder(VarLongCoder.of(), INTERVAL_WINDOW_CODER);
final ListMultimap<byte[], WindowedValue<Long>> firstWindow = ImmutableListMultimap.<byte[], WindowedValue<Long>>builder().put(new byte[] { 0x00 }, valueInIntervalWindow(12L, 10)).put(new byte[] { 0x01 }, valueInIntervalWindow(22L, 10)).put(new byte[] { 0x02 }, valueInIntervalWindow(32L, 10)).build();
final ListMultimap<byte[], WindowedValue<Long>> secondWindow = ImmutableListMultimap.<byte[], WindowedValue<Long>>builder().put(new byte[] { 0x00 }, valueInIntervalWindow(42L, 20)).put(new byte[] { 0x03 }, valueInIntervalWindow(52L, 20)).put(new byte[] { 0x02 }, valueInIntervalWindow(62L, 20)).build();
final ListMultimap<byte[], WindowedValue<Long>> thirdWindow = ImmutableListMultimap.<byte[], WindowedValue<Long>>builder().put(new byte[] { 0x02 }, valueInIntervalWindow(72L, 30)).put(new byte[] { 0x04 }, valueInIntervalWindow(82L, 30)).put(new byte[] { 0x05 }, valueInIntervalWindow(92L, 30)).build();
final PCollectionView<Map<byte[], Iterable<Long>>> view = Pipeline.create().apply(Create.empty(KvCoder.of(ByteArrayCoder.of(), VarLongCoder.of()))).apply(Window.into(FixedWindows.of(Duration.millis(10)))).apply(View.asMultimap());
IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(1, 2, ImmutableList.of(MetadataKeyCoder.of(ByteArrayCoder.of()), INTERVAL_WINDOW_CODER, BigEndianLongCoder.of()), valueCoder);
Multimap<Integer, IsmRecord<WindowedValue<Long>>> elementsPerShard = forMap(ismCoder, firstWindow);
elementsPerShard.putAll(forMap(ismCoder, secondWindow));
elementsPerShard.putAll(forMap(ismCoder, thirdWindow));
List<IsmRecord<WindowedValue<Long>>> firstElements = new ArrayList<>();
List<IsmRecord<WindowedValue<Long>>> secondElements = new ArrayList<>();
for (Map.Entry<Integer, Collection<IsmRecord<WindowedValue<Long>>>> entry : elementsPerShard.asMap().entrySet()) {
if (entry.getKey() % 2 == 0) {
firstElements.addAll(entry.getValue());
} else {
secondElements.addAll(entry.getValue());
}
}
// Ensure that each file will have some records.
checkState(!firstElements.isEmpty());
checkState(!secondElements.isEmpty());
Source sourceA = initInputFile(firstElements, ismCoder);
Source sourceB = initInputFile(secondElements, ismCoder);
List<IsmRecord<WindowedValue<Long>>> firstWindowMapMetadata = forMapMetadata(ByteArrayCoder.of(), firstWindow.keySet(), intervalWindow(10));
List<IsmRecord<WindowedValue<Long>>> secondWindowMapMetadata = forMapMetadata(ByteArrayCoder.of(), secondWindow.keySet(), intervalWindow(20));
List<IsmRecord<WindowedValue<Long>>> thirdWindowMapMetadata = forMapMetadata(ByteArrayCoder.of(), thirdWindow.keySet(), intervalWindow(30));
Source sourceMetaA = initInputFile(firstWindowMapMetadata, ismCoder);
Source sourceMetaB = initInputFile(concat(secondWindowMapMetadata, thirdWindowMapMetadata), ismCoder);
final IsmSideInputReader reader = sideInputReader(view.getTagInternal().getId(), sourceA, sourceB, sourceMetaA, sourceMetaB);
List<Callable<Map<BoundedWindow, Map<byte[], Iterable<Long>>>>> tasks = new ArrayList<>();
for (int i = 0; i < NUM_THREADS; ++i) {
tasks.add(() -> {
// Store a strong reference to the returned value so that the logical reference
// cache is not cleared for this test.
Map<byte[], Iterable<Long>> firstValues = reader.get(view, intervalWindow(10));
Map<byte[], Iterable<Long>> secondValues = reader.get(view, intervalWindow(20));
Map<byte[], Iterable<Long>> thirdValues = reader.get(view, intervalWindow(30));
verifyMap(Maps.transformValues(firstWindow.asMap(), new TransformForMultimap<Long>()), firstValues, new ComparatorForMultimap<Long>());
verifyMap(Maps.transformValues(secondWindow.asMap(), new TransformForMultimap<Long>()), secondValues, new ComparatorForMultimap<Long>());
verifyMap(Maps.transformValues(thirdWindow.asMap(), new TransformForMultimap<Long>()), thirdValues, new ComparatorForMultimap<Long>());
// Assert that the same value reference was returned showing that it was cached.
assertSame(firstValues, reader.get(view, intervalWindow(10)));
assertSame(secondValues, reader.get(view, intervalWindow(20)));
assertSame(thirdValues, reader.get(view, intervalWindow(30)));
// Also verify when requesting a window that is not part of the side input
assertEquals(Collections.EMPTY_MAP, reader.get(view, intervalWindow(40)));
return ImmutableMap.<BoundedWindow, Map<byte[], Iterable<Long>>>of(intervalWindow(10), firstValues, intervalWindow(20), secondValues, intervalWindow(30), thirdValues);
});
}
List<Future<Map<BoundedWindow, Map<byte[], Iterable<Long>>>>> results = pipelineOptions.getExecutorService().invokeAll(tasks);
Map<BoundedWindow, Map<byte[], Iterable<Long>>> value = results.get(0).get();
// Assert that all threads got back the same reference
for (Future<Map<BoundedWindow, Map<byte[], Iterable<Long>>>> result : results) {
assertEquals(value, result.get());
for (Map.Entry<BoundedWindow, Map<byte[], Iterable<Long>>> entry : result.get().entrySet()) {
assertSame(value.get(entry.getKey()), entry.getValue());
}
}
}
use of com.google.api.services.dataflow.model.Source in project beam by apache.
the class IsmSideInputReaderTest method testSingletonMap.
@Test
public void testSingletonMap() throws Exception {
final WindowedValue<Map<String, Long>> element = valueInGlobalWindow(ImmutableMap.<String, Long>builder().put("foo", 0L).put("bar", -1L).build());
Coder<Map<String, Long>> mapCoder = MapCoder.of(StringUtf8Coder.of(), VarLongCoder.of());
final PCollectionView<Map<String, Long>> view = Pipeline.create().apply(Create.empty(KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()))).apply(View.asMap());
IsmRecordCoder<WindowedValue<Map<String, Long>>> recordCoder = IsmRecordCoder.of(1, 0, ImmutableList.<Coder<?>>of(GLOBAL_WINDOW_CODER), WindowedValue.getFullCoder(mapCoder, GLOBAL_WINDOW_CODER));
final Source source = initInputFile(fromValues(Arrays.asList(element)), recordCoder);
final IsmSideInputReader reader = sideInputReader(view.getTagInternal().getId(), source);
List<Callable<Map<String, Long>>> tasks = new ArrayList<>();
for (int i = 0; i < NUM_THREADS; ++i) {
tasks.add(() -> {
// Store a strong reference to the returned value so that the logical reference
// cache is not cleared for this test.
Map<String, Long> value = reader.get(view, GlobalWindow.INSTANCE);
assertEquals(element.getValue(), value);
// Assert that the same value reference was returned showing that it was cached.
assertSame(value, reader.get(view, GlobalWindow.INSTANCE));
return value;
});
}
List<Future<Map<String, Long>>> results = pipelineOptions.getExecutorService().invokeAll(tasks);
// Assert that all threads got back the same reference
Map<String, Long> value = results.get(0).get();
for (Future<Map<String, Long>> result : results) {
assertSame(value, result.get());
}
}
use of com.google.api.services.dataflow.model.Source in project beam by apache.
the class IsmSideInputReaderTest method testMultimap.
@Test
public void testMultimap() throws Exception {
// Note that we purposely use byte[]s as keys to force structural equality testing
// versus using java equality testing.
Coder<WindowedValue<Long>> valueCoder = WindowedValue.getFullCoder(VarLongCoder.of(), GLOBAL_WINDOW_CODER);
final ListMultimap<byte[], WindowedValue<Long>> elements = ImmutableListMultimap.<byte[], WindowedValue<Long>>builder().put(new byte[] { 0x00 }, valueInGlobalWindow(12L)).put(new byte[] { 0x01 }, valueInGlobalWindow(22L)).put(new byte[] { 0x02 }, valueInGlobalWindow(32L)).put(new byte[] { 0x03 }, valueInGlobalWindow(42L)).put(new byte[] { 0x04 }, valueInGlobalWindow(52L)).put(new byte[] { 0x05 }, valueInGlobalWindow(62L)).build();
final PCollectionView<Map<byte[], Iterable<Long>>> view = Pipeline.create().apply(Create.empty(KvCoder.of(ByteArrayCoder.of(), VarLongCoder.of()))).apply(View.asMultimap());
IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(1, 2, ImmutableList.of(MetadataKeyCoder.of(ByteArrayCoder.of()), GLOBAL_WINDOW_CODER, BigEndianLongCoder.of()), valueCoder);
Multimap<Integer, IsmRecord<WindowedValue<Long>>> elementsPerShard = forMap(ismCoder, elements);
List<IsmRecord<WindowedValue<Long>>> firstElements = new ArrayList<>();
List<IsmRecord<WindowedValue<Long>>> secondElements = new ArrayList<>();
for (Map.Entry<Integer, Collection<IsmRecord<WindowedValue<Long>>>> entry : elementsPerShard.asMap().entrySet()) {
if (entry.getKey() % 2 == 0) {
firstElements.addAll(entry.getValue());
} else {
secondElements.addAll(entry.getValue());
}
}
// Ensure that each file will have some records.
checkState(!firstElements.isEmpty());
checkState(!secondElements.isEmpty());
Source sourceA = initInputFile(firstElements, ismCoder);
Source sourceB = initInputFile(secondElements, ismCoder);
List<IsmRecord<WindowedValue<Long>>> mapMetadata = forMapMetadata(ByteArrayCoder.of(), elements.keySet(), GlobalWindow.INSTANCE);
Source sourceMeta = initInputFile(mapMetadata, ismCoder);
final IsmSideInputReader reader = sideInputReader(view.getTagInternal().getId(), sourceA, sourceB, sourceMeta);
List<Callable<Map<byte[], Iterable<Long>>>> tasks = new ArrayList<>();
for (int i = 0; i < NUM_THREADS; ++i) {
tasks.add(() -> {
// Store a strong reference to the returned value so that the logical reference
// cache is not cleared for this test.
Map<byte[], Iterable<Long>> value = reader.get(view, GlobalWindow.INSTANCE);
verifyMap(Maps.transformValues(elements.asMap(), new TransformForMultimap<Long>()), value, new ComparatorForMultimap<Long>());
// Assert that the same value reference was returned showing that it was cached.
assertSame(reader.get(view, GlobalWindow.INSTANCE), value);
return value;
});
}
List<Future<Map<byte[], Iterable<Long>>>> results = pipelineOptions.getExecutorService().invokeAll(tasks);
Map<byte[], Iterable<Long>> value = results.get(0).get();
// Assert that all threads got back the same reference
for (Future<Map<byte[], Iterable<Long>>> result : results) {
assertSame(value, result.get());
}
}
use of com.google.api.services.dataflow.model.Source in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method createReadInstruction.
static ParallelInstruction createReadInstruction(String name, Class<? extends ReaderFactory> readerFactoryClass) {
CloudObject spec = CloudObject.forClass(readerFactoryClass);
Source cloudSource = new Source();
cloudSource.setSpec(spec);
cloudSource.setCodec(windowedStringCoder);
ReadInstruction readInstruction = new ReadInstruction();
readInstruction.setSource(cloudSource);
InstructionOutput output = new InstructionOutput();
output.setName("read_output_name");
output.setCodec(windowedStringCoder);
output.setOriginalName("originalName");
output.setSystemName("systemName");
ParallelInstruction instruction = new ParallelInstruction();
instruction.setSystemName(name);
instruction.setOriginalName(name + "OriginalName");
instruction.setRead(readInstruction);
instruction.setOutputs(Arrays.asList(output));
return instruction;
}
use of com.google.api.services.dataflow.model.Source in project beam by apache.
the class ConcatReaderFactoryTest method createSourcesWithInMemorySources.
Source createSourcesWithInMemorySources(List<List<String>> allData) {
List<Map<String, Object>> sourcesList = new ArrayList<>();
Source source = new Source();
for (List<String> data : allData) {
CloudObject inMemorySourceSpec = CloudObject.forClassName("InMemorySource");
Map<String, Object> inMemorySourceDictionary = new HashMap<>();
addStringList(inMemorySourceSpec, WorkerPropertyNames.ELEMENTS, data);
addLong(inMemorySourceSpec, WorkerPropertyNames.START_INDEX, 0L);
addLong(inMemorySourceSpec, WorkerPropertyNames.END_INDEX, data.size());
inMemorySourceDictionary.put(PropertyNames.SOURCE_SPEC, inMemorySourceSpec);
CloudObject textSourceEncoding = CloudObjects.asCloudObject(StringUtf8Coder.of(), /*sdkComponents=*/
null);
inMemorySourceDictionary.put(PropertyNames.ENCODING, textSourceEncoding);
sourcesList.add(inMemorySourceDictionary);
}
CloudObject spec = CloudObject.forClassName("ConcatSource");
addList(spec, WorkerPropertyNames.CONCAT_SOURCE_SOURCES, sourcesList);
source.setSpec(spec);
return source;
}
Aggregations