Search in sources :

Example 41 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class XmlSinkTest method testCreateWriter.

/**
   * An XmlWriteOperation correctly creates an XmlWriter.
   */
@Test
public void testCreateWriter() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    XmlWriteOperation<Bird> writeOp = XmlIO.<Bird>write().withRecordClass(Bird.class).withRootElement(testRootElement).to(testFilePrefix).createSink().createWriteOperation();
    XmlWriter<Bird> writer = writeOp.createWriter();
    Path outputPath = new File(testFilePrefix).toPath();
    Path tempPath = new File(writer.getWriteOperation().getTemporaryDirectory().toString()).toPath();
    assertThat(tempPath.getParent(), equalTo(outputPath.getParent()));
    assertThat(tempPath.getFileName().toString(), containsString("temp-beam-"));
    assertNotNull(writer.marshaller);
}
Also used : Path(java.nio.file.Path) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) File(java.io.File) Test(org.junit.Test)

Example 42 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class XmlSinkTest method testXmlWriter.

/**
   * An XmlWriter correctly writes objects as Xml elements with an enclosing root element.
   */
@Test
public void testXmlWriter() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    XmlWriteOperation<Bird> writeOp = XmlIO.<Bird>write().to(testFilePrefix).withRecordClass(Bird.class).withRootElement("birds").createSink().createWriteOperation();
    XmlWriter<Bird> writer = writeOp.createWriter();
    List<Bird> bundle = Lists.newArrayList(new Bird("bemused", "robin"), new Bird("evasive", "goose"));
    List<String> lines = Arrays.asList("<birds>", "<bird>", "<species>robin</species>", "<adjective>bemused</adjective>", "</bird>", "<bird>", "<species>goose</species>", "<adjective>evasive</adjective>", "</bird>", "</birds>");
    runTestWrite(writer, bundle, lines, StandardCharsets.UTF_8.name());
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Matchers.containsString(org.hamcrest.Matchers.containsString) Test(org.junit.Test)

Example 43 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class XmlSourceTest method testSplitAtFractionExhaustiveSingleByte.

@Test
public void testSplitAtFractionExhaustiveSingleByte() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    File file = tempFolder.newFile("trainXMLSmall");
    Files.write(file.toPath(), trainXMLWithAllFeaturesSingleByte.getBytes(StandardCharsets.UTF_8));
    BoundedSource<Train> source = XmlIO.<Train>read().from(file.toPath().toString()).withRootElement("trains").withRecordElement("train").withRecordClass(Train.class).createSource();
    assertSplitAtFractionExhaustive(source, options);
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) File(java.io.File) Test(org.junit.Test)

Example 44 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class XmlSourceTest method testSplitAtFractionExhaustiveMultiByte.

@Test
@Ignore("Multi-byte characters in XML are not supported because the parser " + "currently does not correctly report byte offsets")
public void testSplitAtFractionExhaustiveMultiByte() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    File file = tempFolder.newFile("trainXMLSmall");
    Files.write(file.toPath(), trainXMLWithAllFeaturesMultiByte.getBytes(StandardCharsets.UTF_8));
    BoundedSource<Train> source = XmlIO.<Train>read().from(file.toPath().toString()).withRootElement("දුම්රියන්").withRecordElement("දුම්රිය").withRecordClass(Train.class).createSource();
    assertSplitAtFractionExhaustive(source, options);
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) File(java.io.File) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 45 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project DataflowJavaSDK-examples by GoogleCloudPlatform.

the class MinimalWordCount method main.

public static void main(String[] args) {
    // Create a PipelineOptions object. This object lets us set various execution
    // options for our pipeline, such as the runner you wish to use. This example
    // will run with the DirectRunner by default, based on the class path configured
    // in its dependencies.
    PipelineOptions options = PipelineOptionsFactory.create();
    // Create the Pipeline object with the options we defined above.
    Pipeline p = Pipeline.create(options);
    // Apply the pipeline's transforms.
    // Concept #1: Apply a root transform to the pipeline; in this case, TextIO.Read to read a set
    // of input text files. TextIO.Read returns a PCollection where each element is one line from
    // the input text (a set of Shakespeare's texts).
    // This example reads a public data set consisting of the complete works of Shakespeare.
    p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")).apply("ExtractWords", ParDo.of(new DoFn<String, String>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            for (String word : c.element().split(ExampleUtils.TOKENIZER_PATTERN)) {
                if (!word.isEmpty()) {
                    c.output(word);
                }
            }
        }
    })).apply(Count.<String>perElement()).apply("FormatResults", MapElements.via(new SimpleFunction<KV<String, Long>, String>() {

        @Override
        public String apply(KV<String, Long> input) {
            return input.getKey() + ": " + input.getValue();
        }
    })).apply(TextIO.write().to("wordcounts"));
    // Run the pipeline.
    p.run().waitUntilFinish();
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline)

Aggregations

PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)92 Test (org.junit.Test)79 File (java.io.File)26 ArrayList (java.util.ArrayList)16 Pipeline (org.apache.beam.sdk.Pipeline)10 Metadata (org.apache.beam.sdk.io.fs.MatchResult.Metadata)9 Path (java.nio.file.Path)6 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)6 SerializedPipelineOptions (org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions)5 KV (org.apache.beam.sdk.values.KV)5 Matchers.containsString (org.hamcrest.Matchers.containsString)5 Table (com.google.api.services.bigquery.model.Table)4 TableReference (com.google.api.services.bigquery.model.TableReference)4 TableRow (com.google.api.services.bigquery.model.TableRow)4 HashBasedTable (com.google.common.collect.HashBasedTable)4 BoundedToUnboundedSourceAdapter (org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter)4 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)4 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)4 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)3 TableSchema (com.google.api.services.bigquery.model.TableSchema)3