use of org.apache.beam.sdk.io.BoundedSource in project beam by apache.
the class BoundedSourceRunnerTest method testStart.
@Test
public void testStart() throws Exception {
List<WindowedValue<Long>> outValues = new ArrayList<>();
Map<String, Collection<ThrowingConsumer<WindowedValue<Long>>>> outputMap = ImmutableMap.of("out", ImmutableList.of(outValues::add));
ByteString encodedSource = ByteString.copyFrom(SerializableUtils.serializeToByteArray(CountingSource.upTo(3)));
BoundedSourceRunner<BoundedSource<Long>, Long> runner = new BoundedSourceRunner<>(PipelineOptionsFactory.create(), BeamFnApi.FunctionSpec.newBuilder().setData(Any.pack(BytesValue.newBuilder().setValue(encodedSource).build())).build(), outputMap);
runner.start();
assertThat(outValues, contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(2L)));
}
use of org.apache.beam.sdk.io.BoundedSource in project beam by apache.
the class ProcessBundleHandlerTest method testCreatingAndProcessingSource.
@Test
public void testCreatingAndProcessingSource() throws Exception {
Map<String, Message> fnApiRegistry = ImmutableMap.of(LONG_CODER_SPEC_ID, LONG_CODER_SPEC);
String primitiveTransformId = "100L";
long outputId = 101L;
BeamFnApi.Target inputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference("1000L").setName("inputTarget").build();
List<WindowedValue<String>> outputValues = new ArrayList<>();
BeamFnApi.Target outputTarget = BeamFnApi.Target.newBuilder().setPrimitiveTransformReference(primitiveTransformId).setName(Long.toString(outputId)).build();
Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<String>>> existingConsumers = ImmutableMultimap.of(outputTarget, outputValues::add);
Multimap<BeamFnApi.Target, ThrowingConsumer<WindowedValue<BoundedSource<Long>>>> newConsumers = HashMultimap.create();
List<ThrowingRunnable> startFunctions = new ArrayList<>();
List<ThrowingRunnable> finishFunctions = new ArrayList<>();
BeamFnApi.FunctionSpec functionSpec = BeamFnApi.FunctionSpec.newBuilder().setId("1L").setUrn(JAVA_SOURCE_URN).setData(Any.pack(BytesValue.newBuilder().setValue(ByteString.copyFrom(SerializableUtils.serializeToByteArray(CountingSource.upTo(3)))).build())).build();
BeamFnApi.PrimitiveTransform primitiveTransform = BeamFnApi.PrimitiveTransform.newBuilder().setId(primitiveTransformId).setFunctionSpec(functionSpec).putInputs("input", BeamFnApi.Target.List.newBuilder().addTarget(inputTarget).build()).putOutputs(Long.toString(outputId), BeamFnApi.PCollection.newBuilder().setCoderReference(LONG_CODER_SPEC_ID).build()).build();
ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), fnApiRegistry::get, beamFnDataClient);
handler.createConsumersForPrimitiveTransform(primitiveTransform, Suppliers.ofInstance("57L")::get, existingConsumers::get, newConsumers::put, startFunctions::add, finishFunctions::add);
// This is testing a deprecated way of running sources and should be removed
// once all source definitions are instead propagated along the input edge.
Iterables.getOnlyElement(startFunctions).run();
assertThat(outputValues, contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(2L)));
outputValues.clear();
// Check that when passing a source along as an input, the source is processed.
assertEquals(newConsumers.keySet(), ImmutableSet.of(inputTarget));
Iterables.getOnlyElement(newConsumers.get(inputTarget)).accept(valueInGlobalWindow(CountingSource.upTo(2)));
assertThat(outputValues, contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L)));
assertThat(finishFunctions, empty());
}
use of org.apache.beam.sdk.io.BoundedSource in project beam by apache.
the class HCatalogIOTest method testSourceEqualsSplits.
/**
* Test of Read using SourceTestUtils.assertSourcesEqualReferenceSource(..).
*/
@Test
@NeedsTestData
public void testSourceEqualsSplits() throws Exception {
final int numRows = 1500;
final int numSamples = 10;
final long bytesPerRow = 15;
ReaderContext context = getReaderContext(getConfigPropertiesAsMap(service.getHiveConf()));
HCatalogIO.Read spec = HCatalogIO.read().withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf())).withContext(context).withTable(TEST_TABLE);
BoundedHCatalogSource source = new BoundedHCatalogSource(spec);
List<BoundedSource<HCatRecord>> unSplitSource = source.split(-1, OPTIONS);
assertEquals(1, unSplitSource.size());
List<BoundedSource<HCatRecord>> splits = source.split(numRows * bytesPerRow / numSamples, OPTIONS);
assertTrue(splits.size() >= 1);
SourceTestUtils.assertSourcesEqualReferenceSource(unSplitSource.get(0), splits, OPTIONS);
}
use of org.apache.beam.sdk.io.BoundedSource in project beam by apache.
the class SourceTestUtils method assertSplitAtFractionConcurrent.
private static <T> boolean assertSplitAtFractionConcurrent(ExecutorService executor, BoundedSource<T> source, List<T> expectedItems, final int numItemsToReadBeforeSplitting, final double fraction, PipelineOptions options) throws Exception {
// Closed in readerThread
@SuppressWarnings("resource") final BoundedSource.BoundedReader<T> reader = source.createReader(options);
final CountDownLatch unblockSplitter = new CountDownLatch(1);
Future<List<T>> readerThread = executor.submit(() -> {
try {
List<T> items = readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplitting);
unblockSplitter.countDown();
items.addAll(readRemainingFromReader(reader, numItemsToReadBeforeSplitting > 0));
return items;
} finally {
reader.close();
}
});
Future<KV<BoundedSource<T>, BoundedSource<T>>> splitterThread = executor.submit(() -> {
unblockSplitter.await();
BoundedSource<T> residual = reader.splitAtFraction(fraction);
if (residual == null) {
return null;
}
return KV.of(reader.getCurrentSource(), residual);
});
List<T> currentItems = readerThread.get();
KV<BoundedSource<T>, BoundedSource<T>> splitSources = splitterThread.get();
if (splitSources == null) {
return false;
}
SplitAtFractionResult res = verifySingleSplitAtFractionResult(source, expectedItems, currentItems, splitSources.getKey(), splitSources.getValue(), numItemsToReadBeforeSplitting, fraction, options);
return (res.numResidualItems > 0);
}
use of org.apache.beam.sdk.io.BoundedSource in project beam by apache.
the class HadoopFormatIOReadTest method testSkipKeyValueClone.
/**
* This test validates that in case reader is instructed to not to clone key value records, then
* key value records are exactly the same as output from the source no mater if they are mutable
* or immutable. This override setting is useful to turn on when using key-value translation
* functions and avoid possibly unnecessary copy.
*/
@Test
public void testSkipKeyValueClone() throws Exception {
SerializableConfiguration serConf = loadTestConfiguration(EmployeeInputFormat.class, Text.class, Employee.class);
// with skip clone 'true' it should produce different instances of key/value
List<BoundedSource<KV<Text, Employee>>> sources = new HadoopInputFormatBoundedSource<>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), new SingletonTextFn(), new SingletonEmployeeFn(), true, true).split(0, p.getOptions());
for (BoundedSource<KV<Text, Employee>> source : sources) {
List<KV<Text, Employee>> elems = SourceTestUtils.readFromSource(source, p.getOptions());
for (KV<Text, Employee> elem : elems) {
Assert.assertSame(SingletonTextFn.TEXT, elem.getKey());
Assert.assertEquals(SingletonTextFn.TEXT, elem.getKey());
Assert.assertSame(SingletonEmployeeFn.EMPLOYEE, elem.getValue());
Assert.assertEquals(SingletonEmployeeFn.EMPLOYEE, elem.getValue());
}
}
// with skip clone 'false' it should produce different instances of value
sources = new HadoopInputFormatBoundedSource<>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), new SingletonTextFn(), new SingletonEmployeeFn(), false, false).split(0, p.getOptions());
for (BoundedSource<KV<Text, Employee>> source : sources) {
List<KV<Text, Employee>> elems = SourceTestUtils.readFromSource(source, p.getOptions());
for (KV<Text, Employee> elem : elems) {
Assert.assertNotSame(SingletonTextFn.TEXT, elem.getKey());
Assert.assertEquals(SingletonTextFn.TEXT, elem.getKey());
Assert.assertNotSame(SingletonEmployeeFn.EMPLOYEE, elem.getValue());
Assert.assertEquals(SingletonEmployeeFn.EMPLOYEE, elem.getValue());
}
}
}
Aggregations