use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class XmlSinkTest method testCreateWriter.
/**
* An XmlWriteOperation correctly creates an XmlWriter.
*/
@Test
public void testCreateWriter() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
XmlWriteOperation<Bird> writeOp = XmlIO.<Bird>write().withRecordClass(Bird.class).withRootElement(testRootElement).to(testFilePrefix).createSink().createWriteOperation();
XmlWriter<Bird> writer = writeOp.createWriter();
Path outputPath = new File(testFilePrefix).toPath();
Path tempPath = new File(writer.getWriteOperation().getTemporaryDirectory().toString()).toPath();
assertThat(tempPath.getParent(), equalTo(outputPath.getParent()));
assertThat(tempPath.getFileName().toString(), containsString("temp-beam-"));
assertNotNull(writer.marshaller);
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class XmlSinkTest method testXmlWriter.
/**
* An XmlWriter correctly writes objects as Xml elements with an enclosing root element.
*/
@Test
public void testXmlWriter() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
XmlWriteOperation<Bird> writeOp = XmlIO.<Bird>write().to(testFilePrefix).withRecordClass(Bird.class).withRootElement("birds").createSink().createWriteOperation();
XmlWriter<Bird> writer = writeOp.createWriter();
List<Bird> bundle = Lists.newArrayList(new Bird("bemused", "robin"), new Bird("evasive", "goose"));
List<String> lines = Arrays.asList("<birds>", "<bird>", "<species>robin</species>", "<adjective>bemused</adjective>", "</bird>", "<bird>", "<species>goose</species>", "<adjective>evasive</adjective>", "</bird>", "</birds>");
runTestWrite(writer, bundle, lines, StandardCharsets.UTF_8.name());
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class XmlSourceTest method testSplitAtFractionExhaustiveSingleByte.
@Test
public void testSplitAtFractionExhaustiveSingleByte() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
File file = tempFolder.newFile("trainXMLSmall");
Files.write(file.toPath(), trainXMLWithAllFeaturesSingleByte.getBytes(StandardCharsets.UTF_8));
BoundedSource<Train> source = XmlIO.<Train>read().from(file.toPath().toString()).withRootElement("trains").withRecordElement("train").withRecordClass(Train.class).createSource();
assertSplitAtFractionExhaustive(source, options);
}
use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.
the class XmlSourceTest method testSplitAtFractionExhaustiveMultiByte.
@Test
@Ignore("Multi-byte characters in XML are not supported because the parser " + "currently does not correctly report byte offsets")
public void testSplitAtFractionExhaustiveMultiByte() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
File file = tempFolder.newFile("trainXMLSmall");
Files.write(file.toPath(), trainXMLWithAllFeaturesMultiByte.getBytes(StandardCharsets.UTF_8));
BoundedSource<Train> source = XmlIO.<Train>read().from(file.toPath().toString()).withRootElement("දුම්රියන්").withRecordElement("දුම්රිය").withRecordClass(Train.class).createSource();
assertSplitAtFractionExhaustive(source, options);
}
use of org.apache.beam.sdk.options.PipelineOptions in project DataflowJavaSDK-examples by GoogleCloudPlatform.
the class MinimalWordCount method main.
public static void main(String[] args) {
// Create a PipelineOptions object. This object lets us set various execution
// options for our pipeline, such as the runner you wish to use. This example
// will run with the DirectRunner by default, based on the class path configured
// in its dependencies.
PipelineOptions options = PipelineOptionsFactory.create();
// Create the Pipeline object with the options we defined above.
Pipeline p = Pipeline.create(options);
// Apply the pipeline's transforms.
// Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set
// of input text files. TextIO.Read returns a PCollection where each element is one line from
// the input text (a set of Shakespeare's texts).
// This example reads a public data set consisting of the complete works of Shakespeare.
p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")).apply("ExtractWords", ParDo.of(new DoFn<String, String>() {
@ProcessElement
public void processElement(ProcessContext c) {
for (String word : c.element().split(ExampleUtils.TOKENIZER_PATTERN)) {
if (!word.isEmpty()) {
c.output(word);
}
}
}
})).apply(Count.<String>perElement()).apply("FormatResults", MapElements.via(new SimpleFunction<KV<String, Long>, String>() {
@Override
public String apply(KV<String, Long> input) {
return input.getKey() + ": " + input.getValue();
}
})).apply(TextIO.write().to("wordcounts"));
// Run the pipeline.
p.run().waitUntilFinish();
}
Aggregations