use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class FileBasedSourceTest method testReadAllSplitsOfSingleFile.
@Test
public void testReadAllSplitsOfSingleFile() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
List<String> data = createStringDataset(3, 50);
String fileName = "file";
File file = createFileWithData(fileName, data);
TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 16, null);
List<? extends BoundedSource<String>> sources = source.split(32, null);
// Not a trivial split.
assertTrue(sources.size() > 1);
List<String> results = new ArrayList<String>();
for (BoundedSource<String> split : sources) {
results.addAll(readFromSource(split, options));
}
assertThat(data, containsInAnyOrder(results.toArray()));
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class CompressedSourceTest method testUnsplittable.
@Test
public void testUnsplittable() throws IOException {
String baseName = "test-input";
File compressedFile = tmpFolder.newFile(baseName + ".gz");
byte[] input = generateInput(10000);
writeFile(compressedFile, input, CompressionMode.GZIP);
CompressedSource<Byte> source = CompressedSource.from(new ByteSource(compressedFile.getPath(), 1));
List<Byte> expected = Lists.newArrayList();
for (byte i : input) {
expected.add(i);
}
PipelineOptions options = PipelineOptionsFactory.create();
BoundedReader<Byte> reader = source.createReader(options);
List<Byte> actual = Lists.newArrayList();
for (boolean hasNext = reader.start(); hasNext; hasNext = reader.advance()) {
actual.add(reader.getCurrent());
// checkpoint every 9 elements
if (actual.size() % 9 == 0) {
Double fractionConsumed = reader.getFractionConsumed();
assertNotNull(fractionConsumed);
assertNull(reader.splitAtFraction(fractionConsumed));
}
}
assertEquals(expected.size(), actual.size());
assertEquals(Sets.newHashSet(expected), Sets.newHashSet(actual));
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class MongoDBGridFSIOTest method testSplit.
@Test
public void testSplit() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
MongoDbGridFSIO.Read<String> read = MongoDbGridFSIO.<String>read().withUri("mongodb://localhost:" + port).withDatabase(DATABASE);
BoundedGridFSSource src = new BoundedGridFSSource(read, null);
// make sure 2 files can fit in
long desiredBundleSizeBytes = (src.getEstimatedSizeBytes(options) * 2L) / 5L + 1000;
List<? extends BoundedSource<ObjectId>> splits = src.split(desiredBundleSizeBytes, options);
int expectedNbSplits = 3;
assertEquals(expectedNbSplits, splits.size());
SourceTestUtils.assertSourcesEqualReferenceSource(src, splits, options);
int nonEmptySplits = 0;
int count = 0;
for (BoundedSource<ObjectId> subSource : splits) {
List<ObjectId> result = SourceTestUtils.readFromSource(subSource, options);
if (result.size() > 0) {
nonEmptySplits += 1;
}
count += result.size();
}
assertEquals(expectedNbSplits, nonEmptySplits);
assertEquals(5, count);
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class JmsIOTest method testSplitForQueue.
@Test
public void testSplitForQueue() throws Exception {
JmsIO.Read read = JmsIO.read().withQueue(QUEUE);
PipelineOptions pipelineOptions = PipelineOptionsFactory.create();
int desiredNumSplits = 5;
JmsIO.UnboundedJmsSource initialSource = new JmsIO.UnboundedJmsSource(read);
List<JmsIO.UnboundedJmsSource> splits = initialSource.split(desiredNumSplits, pipelineOptions);
// in the case of a queue, we have concurrent consumers by default, so the initial number
// splits is equal to the desired number of splits
assertEquals(desiredNumSplits, splits.size());
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class DirectRunnerTest method testMutatingOutputWithEnforcementDisabledSucceeds.
/**
* Tests that a {@link DoFn} that mutates an output with a good equals() fails in the
* {@link DirectRunner}.
*/
@Test
public void testMutatingOutputWithEnforcementDisabledSucceeds() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
options.setRunner(DirectRunner.class);
options.as(DirectOptions.class).setEnforceImmutability(false);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(Create.of(42)).apply(ParDo.of(new DoFn<Integer, List<Integer>>() {
@ProcessElement
public void processElement(ProcessContext c) {
List<Integer> outputList = Arrays.asList(1, 2, 3, 4);
c.output(outputList);
outputList.set(0, 37);
c.output(outputList);
}
}));
pipeline.run();
}
Aggregations