Search in sources :

Example 11 with IsmRecord

use of org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord in project beam by apache.

the class BatchViewOverridesTest method testToIsmRecordForMapLikeDoFnWithoutUniqueKeysThrowsException.

@Test
public void testToIsmRecordForMapLikeDoFnWithoutUniqueKeysThrowsException() throws Exception {
    TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForSizeTag = new TupleTag<>();
    TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForEntrySetTag = new TupleTag<>();
    Coder<Long> keyCoder = VarLongCoder.of();
    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
    IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(1, 2, ImmutableList.of(MetadataKeyCoder.of(keyCoder), IntervalWindow.getCoder(), BigEndianLongCoder.of()), FullWindowedValueCoder.of(VarLongCoder.of(), windowCoder));
    DoFnTester<KV<Integer, Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>>, IsmRecord<WindowedValue<Long>>> doFnTester = DoFnTester.of(new BatchViewOverrides.BatchViewAsMultimap.ToIsmRecordForMapLikeDoFn<>(outputForSizeTag, outputForEntrySetTag, windowCoder, keyCoder, ismCoder, true));
    IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
    Iterable<KV<Integer, Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>>> inputElements = ImmutableList.of(KV.of(1, (Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>) ImmutableList.of(KV.of(KV.of(1L, windowA), WindowedValue.of(110L, new Instant(1), windowA, PaneInfo.NO_FIRING)), // same window same key as to previous
    KV.of(KV.of(1L, windowA), WindowedValue.of(111L, new Instant(2), windowA, PaneInfo.NO_FIRING)))));
    thrown.expect(IllegalStateException.class);
    thrown.expectMessage("Unique keys are expected but found key");
    doFnTester.processBundle(inputElements);
}
Also used : Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) IsmRecord(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord) KV(org.apache.beam.sdk.values.KV) WindowedValue(org.apache.beam.sdk.util.WindowedValue) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 12 with IsmRecord

use of org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord in project beam by apache.

the class BatchViewOverridesTest method testToMapDoFn.

@Test
public void testToMapDoFn() throws Exception {
    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
    DoFnTester<KV<Integer, Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>>, IsmRecord<WindowedValue<TransformedMap<Long, WindowedValue<Long>, Long>>>> doFnTester = DoFnTester.of(new BatchViewOverrides.BatchViewAsMap.ToMapDoFn<Long, Long, IntervalWindow>(windowCoder));
    IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
    IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
    IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
    Iterable<KV<Integer, Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>>> inputElements = ImmutableList.of(KV.of(1, (Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>) ImmutableList.of(KV.of(windowA, WindowedValue.of(KV.of(1L, 11L), new Instant(3), windowA, PaneInfo.NO_FIRING)), KV.of(windowA, WindowedValue.of(KV.of(2L, 21L), new Instant(7), windowA, PaneInfo.NO_FIRING)), KV.of(windowB, WindowedValue.of(KV.of(2L, 21L), new Instant(13), windowB, PaneInfo.NO_FIRING)), KV.of(windowB, WindowedValue.of(KV.of(3L, 31L), new Instant(15), windowB, PaneInfo.NO_FIRING)))), KV.of(2, (Iterable<KV<IntervalWindow, WindowedValue<KV<Long, Long>>>>) ImmutableList.of(KV.of(windowC, WindowedValue.of(KV.of(4L, 41L), new Instant(25), windowC, PaneInfo.NO_FIRING)))));
    // The order of the output elements is important relative to processing order
    List<IsmRecord<WindowedValue<TransformedMap<Long, WindowedValue<Long>, Long>>>> output = doFnTester.processBundle(inputElements);
    assertEquals(3, output.size());
    Map<Long, Long> outputMap;
    outputMap = output.get(0).getValue().getValue();
    assertEquals(2, outputMap.size());
    assertEquals(ImmutableMap.of(1L, 11L, 2L, 21L), outputMap);
    outputMap = output.get(1).getValue().getValue();
    assertEquals(2, outputMap.size());
    assertEquals(ImmutableMap.of(2L, 21L, 3L, 31L), outputMap);
    outputMap = output.get(2).getValue().getValue();
    assertEquals(1, outputMap.size());
    assertEquals(ImmutableMap.of(4L, 41L), outputMap);
}
Also used : Instant(org.joda.time.Instant) IsmRecord(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord) KV(org.apache.beam.sdk.values.KV) TransformedMap(org.apache.beam.runners.dataflow.BatchViewOverrides.TransformedMap) WindowedValue(org.apache.beam.sdk.util.WindowedValue) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 13 with IsmRecord

use of org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord in project beam by apache.

the class BatchViewOverridesTest method testToIsmMetadataRecordForKeyDoFn.

@Test
public void testToIsmMetadataRecordForKeyDoFn() throws Exception {
    Coder<Long> keyCoder = VarLongCoder.of();
    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
    IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(1, 2, ImmutableList.of(MetadataKeyCoder.of(keyCoder), IntervalWindow.getCoder(), BigEndianLongCoder.of()), FullWindowedValueCoder.of(VarLongCoder.of(), windowCoder));
    DoFnTester<KV<Integer, Iterable<KV<IntervalWindow, Long>>>, IsmRecord<WindowedValue<Long>>> doFnTester = DoFnTester.of(new BatchViewOverrides.BatchViewAsMultimap.ToIsmMetadataRecordForKeyDoFn<Long, Long, IntervalWindow>(keyCoder, windowCoder));
    IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
    IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
    IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
    Iterable<KV<Integer, Iterable<KV<IntervalWindow, Long>>>> inputElements = ImmutableList.of(KV.of(1, (Iterable<KV<IntervalWindow, Long>>) ImmutableList.of(KV.of(windowA, 2L), // same window as previous
    KV.of(windowA, 3L), // different window as previous
    KV.of(windowB, 3L))), KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)), (Iterable<KV<IntervalWindow, Long>>) ImmutableList.of(KV.of(windowC, 3L))));
    // The order of the output elements is important relative to processing order
    assertThat(doFnTester.processBundle(inputElements), contains(IsmRecord.<WindowedValue<Long>>meta(ImmutableList.of(IsmFormat.getMetadataKey(), windowA, 1L), CoderUtils.encodeToByteArray(VarLongCoder.of(), 2L)), IsmRecord.<WindowedValue<Long>>meta(ImmutableList.of(IsmFormat.getMetadataKey(), windowA, 2L), CoderUtils.encodeToByteArray(VarLongCoder.of(), 3L)), IsmRecord.<WindowedValue<Long>>meta(ImmutableList.of(IsmFormat.getMetadataKey(), windowB, 1L), CoderUtils.encodeToByteArray(VarLongCoder.of(), 3L)), IsmRecord.<WindowedValue<Long>>meta(ImmutableList.of(IsmFormat.getMetadataKey(), windowC, 1L), CoderUtils.encodeToByteArray(VarLongCoder.of(), 3L))));
}
Also used : Instant(org.joda.time.Instant) IsmRecord(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord) KV(org.apache.beam.sdk.values.KV) WindowedValue(org.apache.beam.sdk.util.WindowedValue) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 14 with IsmRecord

use of org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord in project beam by apache.

the class IsmReaderFactory method createImpl.

<V> NativeReader<?> createImpl(CloudObject spec, Coder<?> coder, PipelineOptions options, DataflowExecutionContext executionContext, DataflowOperationContext operationContext) throws Exception {
    final ResourceId resourceId = FileSystems.matchNewResource(getString(spec, WorkerPropertyNames.FILENAME), false);
    checkArgument(coder instanceof WindowedValueCoder, "%s only supports using %s but got %s.", IsmReader.class, WindowedValueCoder.class, coder);
    @SuppressWarnings("unchecked") WindowedValueCoder<IsmRecord<V>> windowedCoder = (WindowedValueCoder<IsmRecord<V>>) coder;
    checkArgument(windowedCoder.getValueCoder() instanceof IsmRecordCoder, "%s only supports using %s but got %s.", IsmReader.class, IsmRecordCoder.class, windowedCoder.getValueCoder());
    @SuppressWarnings("unchecked") final IsmRecordCoder<V> ismCoder = (IsmRecordCoder<V>) windowedCoder.getValueCoder();
    checkArgument(executionContext instanceof BatchModeExecutionContext, "%s only supports using %s but got %s.", IsmReader.class, BatchModeExecutionContext.class, executionContext);
    final BatchModeExecutionContext execContext = (BatchModeExecutionContext) executionContext;
    // the same file.
    return execContext.<IsmReaderKey, NativeReader<?>>getLogicalReferenceCache().get(new IsmReaderKey(resourceId.toString()), () -> new IsmReaderImpl<V>(resourceId, ismCoder, execContext.<IsmReaderImpl.IsmShardKey, WeightedValue<NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>>>>getDataCache()));
}
Also used : RandomAccessData(org.apache.beam.runners.dataflow.util.RandomAccessData) IsmRecord(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord) WeightedValue(org.apache.beam.sdk.util.WeightedValue) IsmRecordCoder(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecordCoder) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) WindowedValue(org.apache.beam.sdk.util.WindowedValue)

Example 15 with IsmRecord

use of org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord in project beam by apache.

the class IsmSideInputReader method getListIndexFromReaderIterators.

/**
 * Returns the last key from each reader iterator. This function assumes that the last key
 * component is a long.
 */
private <V> Collection<Long> getListIndexFromReaderIterators(List<IsmReader<V>.IsmPrefixReaderIterator> readerIterators) throws IOException {
    List<Callable<Long>> callables = new ArrayList<>();
    // Build a list of callables that will return the last key component assuming its a long
    for (final IsmReader<V>.IsmPrefixReaderIterator readerIterator : readerIterators) {
        callables.add(() -> {
            WindowedValue<IsmRecord<V>> last = readerIterator.getLast();
            if (last == null) {
                return 0L;
            }
            return ((long) last.getValue().getKeyComponent(last.getValue().getKeyComponents().size() - 1)) + 1L;
        });
    }
    try {
        List<Future<Long>> results = executorService.invokeAll(callables);
        List<Long> lastKeyComponents = new ArrayList<>(results.size());
        for (Future<Long> result : results) {
            lastKeyComponents.add(result.get());
        }
        return lastKeyComponents;
    } catch (InterruptedException | ExecutionException e) {
        if (e instanceof InterruptedException) {
            Thread.currentThread().interrupt();
        }
        // Attempt to propagate the cause if possible.
        Throwables.propagateIfPossible(e.getCause(), IOException.class);
        throw new IOException(e);
    }
}
Also used : ArrayList(java.util.ArrayList) IsmRecord(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord) IOException(java.io.IOException) Callable(java.util.concurrent.Callable) Future(java.util.concurrent.Future) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

IsmRecord (org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord)26 Test (org.junit.Test)17 WindowedValue (org.apache.beam.sdk.util.WindowedValue)16 ArrayList (java.util.ArrayList)12 File (java.io.File)8 KV (org.apache.beam.sdk.values.KV)8 HashMap (java.util.HashMap)7 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)7 Instant (org.joda.time.Instant)7 SortedMap (java.util.SortedMap)6 TreeMap (java.util.TreeMap)6 Callable (java.util.concurrent.Callable)6 Future (java.util.concurrent.Future)6 Source (com.google.api.services.dataflow.model.Source)5 Collection (java.util.Collection)5 Map (java.util.Map)5 RandomAccessData (org.apache.beam.runners.dataflow.util.RandomAccessData)5 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)5 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)3 TransformedMap (org.apache.beam.runners.dataflow.BatchViewOverrides.TransformedMap)2