use of org.apache.beam.sdk.io.mongodb.MongoDbGridFSIO.Read.BoundedGridFSSource in project beam by apache.
the class MongoDBGridFSIOTest method testSplit.
@Test
public void testSplit() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
MongoDbGridFSIO.Read<String> read = MongoDbGridFSIO.<String>read().withUri("mongodb://localhost:" + port).withDatabase(DATABASE);
BoundedGridFSSource src = new BoundedGridFSSource(read, null);
// make sure 2 files can fit in
long desiredBundleSizeBytes = (src.getEstimatedSizeBytes(options) * 2L) / 5L + 1000;
List<? extends BoundedSource<ObjectId>> splits = src.split(desiredBundleSizeBytes, options);
int expectedNbSplits = 3;
assertEquals(expectedNbSplits, splits.size());
SourceTestUtils.assertSourcesEqualReferenceSource(src, splits, options);
int nonEmptySplits = 0;
int count = 0;
for (BoundedSource<ObjectId> subSource : splits) {
List<ObjectId> result = SourceTestUtils.readFromSource(subSource, options);
if (result.size() > 0) {
nonEmptySplits += 1;
}
count += result.size();
}
assertEquals(expectedNbSplits, nonEmptySplits);
assertEquals(5, count);
}
Aggregations