Search in sources :

Example 56 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class CompressedSourceTest method testGzipProgress.

@Test
public void testGzipProgress() throws IOException {
    int numRecords = 3;
    File tmpFile = tmpFolder.newFile("nonempty.gz");
    String filename = tmpFile.toPath().toString();
    writeFile(tmpFile, new byte[numRecords], CompressionMode.GZIP);
    PipelineOptions options = PipelineOptionsFactory.create();
    CompressedSource<Byte> source = CompressedSource.from(new ByteSource(filename, 1));
    try (BoundedReader<Byte> readerOrig = source.createReader(options)) {
        assertThat(readerOrig, instanceOf(CompressedReader.class));
        CompressedReader<Byte> reader = (CompressedReader<Byte>) readerOrig;
        // before starting
        assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
        assertEquals(0, reader.getSplitPointsConsumed());
        assertEquals(1, reader.getSplitPointsRemaining());
        // confirm has three records
        for (int i = 0; i < numRecords; ++i) {
            if (i == 0) {
                assertTrue(reader.start());
            } else {
                assertTrue(reader.advance());
            }
            assertEquals(0, reader.getSplitPointsConsumed());
            assertEquals(1, reader.getSplitPointsRemaining());
        }
        assertFalse(reader.advance());
        // after reading empty source
        assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
        assertEquals(1, reader.getSplitPointsConsumed());
        assertEquals(0, reader.getSplitPointsRemaining());
    }
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) CompressedReader(org.apache.beam.sdk.io.CompressedSource.CompressedReader) Matchers.containsString(org.hamcrest.Matchers.containsString) File(java.io.File) Test(org.junit.Test)

Example 57 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class JmsIOTest method testSplitForTopic.

@Test
public void testSplitForTopic() throws Exception {
    JmsIO.Read read = JmsIO.read().withTopic(TOPIC);
    PipelineOptions pipelineOptions = PipelineOptionsFactory.create();
    int desiredNumSplits = 5;
    JmsIO.UnboundedJmsSource initialSource = new JmsIO.UnboundedJmsSource(read);
    List<JmsIO.UnboundedJmsSource> splits = initialSource.split(desiredNumSplits, pipelineOptions);
    // in the case of a topic, we can have only an unique subscriber on the topic per pipeline
    // else it means we can have duplicate messages (all subscribers on the topic receive every
    // message).
    // So, whatever the desizedNumSplits is, the actual number of splits should be 1.
    assertEquals(1, splits.size());
}
Also used : PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Test(org.junit.Test)

Example 58 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class XmlSinkTest method testCreateWriteOperations.

/**
   * An XML Sink correctly creates an XmlWriteOperation.
   */
@Test
public void testCreateWriteOperations() {
    PipelineOptions options = PipelineOptionsFactory.create();
    XmlSink<Bird> sink = XmlIO.<Bird>write().to(testFilePrefix).withRecordClass(Bird.class).withRootElement(testRootElement).createSink();
    XmlWriteOperation<Bird> writeOp = sink.createWriteOperation();
    Path outputPath = new File(testFilePrefix).toPath();
    Path tempPath = new File(writeOp.getTemporaryDirectory().toString()).toPath();
    assertThat(tempPath.getParent(), equalTo(outputPath.getParent()));
    assertThat(tempPath.getFileName().toString(), containsString("temp-beam-"));
}
Also used : Path(java.nio.file.Path) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) File(java.io.File) Test(org.junit.Test)

Example 59 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class FlinkReduceFunction method reduce.

@Override
public void reduce(Iterable<WindowedValue<KV<K, AccumT>>> elements, Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
    PipelineOptions options = serializedOptions.getPipelineOptions();
    FlinkSideInputReader sideInputReader = new FlinkSideInputReader(sideInputs, getRuntimeContext());
    AbstractFlinkCombineRunner<K, AccumT, AccumT, OutputT, W> reduceRunner;
    if (!windowingStrategy.getWindowFn().isNonMerging() && !windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
        reduceRunner = new HashingFlinkCombineRunner<>();
    } else {
        reduceRunner = new SortingFlinkCombineRunner<>();
    }
    reduceRunner.combine(new AbstractFlinkCombineRunner.FinalFlinkCombiner<K, AccumT, OutputT>(combineFn), windowingStrategy, sideInputReader, options, elements, out);
}
Also used : SerializedPipelineOptions(org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions)

Example 60 with PipelineOptions

use of org.apache.beam.sdk.options.PipelineOptions in project beam by apache.

the class FlinkMergingNonShuffleReduceFunction method reduce.

@Override
public void reduce(Iterable<WindowedValue<KV<K, InputT>>> elements, Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
    PipelineOptions options = serializedOptions.getPipelineOptions();
    FlinkSideInputReader sideInputReader = new FlinkSideInputReader(sideInputs, getRuntimeContext());
    AbstractFlinkCombineRunner<K, InputT, AccumT, OutputT, W> reduceRunner;
    if (windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
        reduceRunner = new SortingFlinkCombineRunner<>();
    } else {
        reduceRunner = new HashingFlinkCombineRunner<>();
    }
    reduceRunner.combine(new AbstractFlinkCombineRunner.CompleteFlinkCombiner<K, InputT, AccumT, OutputT>(combineFn), windowingStrategy, sideInputReader, options, elements, out);
}
Also used : SerializedPipelineOptions(org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions)

Aggregations

PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)92 Test (org.junit.Test)79 File (java.io.File)26 ArrayList (java.util.ArrayList)16 Pipeline (org.apache.beam.sdk.Pipeline)10 Metadata (org.apache.beam.sdk.io.fs.MatchResult.Metadata)9 Path (java.nio.file.Path)6 BigQueryHelpers.toJsonString (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString)6 SerializedPipelineOptions (org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions)5 KV (org.apache.beam.sdk.values.KV)5 Matchers.containsString (org.hamcrest.Matchers.containsString)5 Table (com.google.api.services.bigquery.model.Table)4 TableReference (com.google.api.services.bigquery.model.TableReference)4 TableRow (com.google.api.services.bigquery.model.TableRow)4 HashBasedTable (com.google.common.collect.HashBasedTable)4 BoundedToUnboundedSourceAdapter (org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter)4 BigQueryHelpers.createTempTableReference (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference)4 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)4 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)3 TableSchema (com.google.api.services.bigquery.model.TableSchema)3