use of org.apache.beam.sdk.io.common.TestRow.SelectNameFn in project beam by apache.
the class S3FileSystemIT method testWriteThenRead.
@Test
public void testWriteThenRead() {
int rows = env.options().getNumberOfRows();
// Write test dataset to S3.
pipelineWrite.apply("Generate Sequence", GenerateSequence.from(0).to(rows)).apply("Prepare TestRows", ParDo.of(new DeterministicallyConstructTestRowFn())).apply("Prepare file rows", ParDo.of(new SelectNameFn())).apply("Write to S3 file", TextIO.write().to("s3://" + s3Bucket.name + "/test"));
pipelineWrite.run().waitUntilFinish();
// Read test dataset from S3.
PCollection<String> output = pipelineRead.apply(TextIO.read().from("s3://" + s3Bucket.name + "/test*"));
PAssert.thatSingleton(output.apply("Count All", Count.globally())).isEqualTo((long) rows);
PAssert.that(output.apply(Combine.globally(new HashingFn()).withoutDefaults())).containsInAnyOrder(getExpectedHashForRowCount(rows));
pipelineRead.run().waitUntilFinish();
}
use of org.apache.beam.sdk.io.common.TestRow.SelectNameFn in project beam by apache.
the class S3FileSystemIT method testWriteThenRead.
@Test
public void testWriteThenRead() {
int rows = env.options().getNumberOfRows();
// Write test dataset to S3.
pipelineWrite.apply("Generate Sequence", GenerateSequence.from(0).to(rows)).apply("Prepare TestRows", ParDo.of(new DeterministicallyConstructTestRowFn())).apply("Prepare file rows", ParDo.of(new SelectNameFn())).apply("Write to S3 file", TextIO.write().to("s3://" + s3Bucket.name + "/test"));
pipelineWrite.run().waitUntilFinish();
// Read test dataset from S3.
PCollection<String> output = pipelineRead.apply(TextIO.read().from("s3://" + s3Bucket.name + "/test*"));
PAssert.thatSingleton(output.apply(Count.globally())).isEqualTo((long) rows);
PAssert.that(output.apply(Combine.globally(new HashingFn()).withoutDefaults())).containsInAnyOrder(getExpectedHashForRowCount(rows));
pipelineRead.run().waitUntilFinish();
}
Aggregations