use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class FileBasedSourceTest method testFullyReadSingleFile.
@Test
public void testFullyReadSingleFile() throws IOException {
PipelineOptions options = PipelineOptionsFactory.create();
List<String> data = createStringDataset(3, 50);
String fileName = "file";
File file = createFileWithData(fileName, data);
TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 64, null);
assertEquals(data, readFromSource(source, options));
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class FileBasedSourceTest method testSplitAtFraction.
@Test
public void testSplitAtFraction() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
File file = createFileWithData("file", createStringDataset(3, 100));
Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
TestFileBasedSource source = new TestFileBasedSource(metadata, 1, 0, file.length(), null);
// Shouldn't be able to split while unstarted.
assertSplitAtFractionFails(source, 0, 0.7, options);
assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.7, options);
assertSplitAtFractionSucceedsAndConsistent(source, 30, 0.7, options);
assertSplitAtFractionFails(source, 0, 0.0, options);
assertSplitAtFractionFails(source, 70, 0.3, options);
assertSplitAtFractionFails(source, 100, 1.0, options);
assertSplitAtFractionFails(source, 100, 0.99, options);
assertSplitAtFractionSucceedsAndConsistent(source, 100, 0.995, options);
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class FileBasedSourceTest method testSplitAtFractionExhaustive.
@Test
public void testSplitAtFractionExhaustive() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
// Smaller file for exhaustive testing.
File file = createFileWithData("file", createStringDataset(3, 20));
Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
TestFileBasedSource source = new TestFileBasedSource(metadata, 1, 0, file.length(), null);
assertSplitAtFractionExhaustive(source, options);
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class FileBasedSourceTest method testFullyReadFilePattern.
@Test
public void testFullyReadFilePattern() throws IOException {
PipelineOptions options = PipelineOptionsFactory.create();
List<String> data1 = createStringDataset(3, 50);
File file1 = createFileWithData("file1", data1);
List<String> data2 = createStringDataset(3, 50);
createFileWithData("file2", data2);
List<String> data3 = createStringDataset(3, 50);
createFileWithData("file3", data3);
List<String> data4 = createStringDataset(3, 50);
createFileWithData("otherfile", data4);
TestFileBasedSource source = new TestFileBasedSource(new File(file1.getParent(), "file*").getPath(), 64, null);
List<String> expectedResults = new ArrayList<String>();
expectedResults.addAll(data1);
expectedResults.addAll(data2);
expectedResults.addAll(data3);
assertThat(expectedResults, containsInAnyOrder(readFromSource(source, options).toArray()));
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class MinimalWordCount method main.
public static void main(String[] args) {
// Create a PipelineOptions object. This object lets us set various execution
// options for our pipeline, such as the runner you wish to use. This example
// will run with the DirectRunner by default, based on the class path configured
// in its dependencies.
PipelineOptions options = PipelineOptionsFactory.create();
// Create the Pipeline object with the options we defined above.
Pipeline p = Pipeline.create(options);
// Apply the pipeline's transforms.
// Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set
// of input text files. TextIO.Read returns a PCollection where each element is one line from
// the input text (a set of Shakespeare's texts).
// This example reads a public data set consisting of the complete works of Shakespeare.
p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")).apply("ExtractWords", ParDo.of(new DoFn<String, String>() {
@ProcessElement
public void processElement(ProcessContext c) {
for (String word : c.element().split(ExampleUtils.TOKENIZER_PATTERN)) {
if (!word.isEmpty()) {
c.output(word);
}
}
}
})).apply(Count.<String>perElement()).apply("FormatResults", MapElements.via(new SimpleFunction<KV<String, Long>, String>() {
@Override
public String apply(KV<String, Long> input) {
return input.getKey() + ": " + input.getValue();
}
})).apply(TextIO.write().to("wordcounts"));
// Run the pipeline.
p.run().waitUntilFinish();
}
Aggregations