Search in sources :

Example 6 with NumberedShardedFile

use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.

the class GcsKmsKeyIT method testGcsWriteWithKmsKey.

/**
 * Tests writing to tempLocation with --dataflowKmsKey set on the command line. Verifies that
 * resulting output uses specified key and is readable. Does not verify any temporary files.
 *
 * <p>This test verifies that GCS file copies work with CMEK-enabled files.
 */
@Test
public void testGcsWriteWithKmsKey() {
    TestPipelineOptions options = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);
    assertNotNull(options.getTempRoot());
    options.setTempLocation(options.getTempRoot() + "/testGcsWriteWithKmsKey");
    GcsOptions gcsOptions = options.as(GcsOptions.class);
    ResourceId filenamePrefix = FileSystems.matchNewResource(gcsOptions.getGcpTempLocation(), true).resolve(String.format("GcsKmsKeyIT-%tF-%<tH-%<tM-%<tS-%<tL.output", new Date()), StandardResolveOptions.RESOLVE_FILE);
    Pipeline p = Pipeline.create(options);
    p.apply("ReadLines", TextIO.read().from(INPUT_FILE)).apply("WriteLines", TextIO.write().to(filenamePrefix));
    PipelineResult result = p.run();
    State state = result.waitUntilFinish();
    assertThat(state, equalTo(State.DONE));
    String filePattern = filenamePrefix + "*-of-*";
    assertThat(new NumberedShardedFile(filePattern), fileContentsHaveChecksum(EXPECTED_CHECKSUM));
    // Verify objects have KMS key set.
    try {
        MatchResult matchResult = Iterables.getOnlyElement(FileSystems.match(Collections.singletonList(filePattern)));
        GcsUtil gcsUtil = gcsOptions.getGcsUtil();
        for (Metadata metadata : matchResult.metadata()) {
            String kmsKey = gcsUtil.getObject(GcsPath.fromUri(metadata.resourceId().toString())).getKmsKeyName();
            assertNotNull(kmsKey);
        }
    } catch (IOException e) {
        throw new AssertionError(e);
    }
}
Also used : Metadata(org.apache.beam.sdk.io.fs.MatchResult.Metadata) PipelineResult(org.apache.beam.sdk.PipelineResult) IOException(java.io.IOException) TestPipelineOptions(org.apache.beam.sdk.testing.TestPipelineOptions) MatchResult(org.apache.beam.sdk.io.fs.MatchResult) Date(java.util.Date) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) State(org.apache.beam.sdk.PipelineResult.State) NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) GcsOptions(org.apache.beam.sdk.extensions.gcp.options.GcsOptions) GcsUtil(org.apache.beam.sdk.extensions.gcp.util.GcsUtil) Test(org.junit.Test)

Example 7 with NumberedShardedFile

use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.

the class WordCountIT method testE2EWordCount.

@Test
public void testE2EWordCount() throws Exception {
    WordCountITOptions options = TestPipeline.testingPipelineOptions().as(WordCountITOptions.class);
    options.setInputFile(DEFAULT_INPUT);
    options.setOutput(FileSystems.matchNewResource(options.getTempRoot(), true).resolve(String.format("WordCountIT-%tF-%<tH-%<tM-%<tS-%<tL", new Date()), StandardResolveOptions.RESOLVE_DIRECTORY).resolve("output", StandardResolveOptions.RESOLVE_DIRECTORY).resolve("results", StandardResolveOptions.RESOLVE_FILE).toString());
    WordCount.runWordCount(options);
    assertThat(new NumberedShardedFile(options.getOutput() + "*-of-*"), fileContentsHaveChecksum(DEFAULT_OUTPUT_CHECKSUM));
}
Also used : NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) Date(java.util.Date) Test(org.junit.Test)

Example 8 with NumberedShardedFile

use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.

the class TopWikipediaSessionsIT method testE2ETopWikiPages.

@Test
public void testE2ETopWikiPages() throws Exception {
    TopWikipediaSessionsITOptions options = TestPipeline.testingPipelineOptions().as(TopWikipediaSessionsITOptions.class);
    options.setWikiInput(DEFAULT_INPUT_10_FILES);
    options.setOutput(FileSystems.matchNewResource(options.getTempRoot(), true).resolve(String.format("topwikisessions-it-%tF-%<tH-%<tM-%<tS-%<tL", new Date()), StandardResolveOptions.RESOLVE_DIRECTORY).resolve("output", StandardResolveOptions.RESOLVE_DIRECTORY).resolve("results", StandardResolveOptions.RESOLVE_FILE).toString());
    TopWikipediaSessions.run(options);
    assertThat(new NumberedShardedFile(options.getOutput() + "*-of-*"), fileContentsHaveChecksum(DEFAULT_OUTPUT_CHECKSUM));
}
Also used : NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) Date(java.util.Date) Test(org.junit.Test)

Example 9 with NumberedShardedFile

use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.

the class TextTableProviderTest method testWriteCsv.

@Test
public void testWriteCsv() throws Exception {
    File destinationFile = new File(tempFolder.getRoot(), "csv-outputs");
    // NumberedShardedFile
    String query = "INSERT INTO test VALUES ('hello', 42), ('goodbye', 13)";
    String ddl = String.format("CREATE EXTERNAL TABLE test %s TYPE text LOCATION '%s' TBLPROPERTIES '{\"format\":\"csv\"}'", SQL_CSV_SCHEMA, destinationFile.getAbsolutePath());
    pipeline.apply(SqlTransform.query(query).withDdlString(ddl));
    pipeline.run();
    assertThat(new NumberedShardedFile(destinationFile.getAbsolutePath() + "*").readFilesWithRetries(Sleeper.DEFAULT, BackOff.STOP_BACKOFF), containsInAnyOrder("hello,42", "goodbye,13"));
}
Also used : NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) File(java.io.File) NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) Test(org.junit.Test)

Example 10 with NumberedShardedFile

use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.

the class JpmsIT method testE2EJpms.

@Test
public void testE2EJpms() {
    JpmsITOptions options = TestPipeline.testingPipelineOptions().as(JpmsITOptions.class);
    options.setInputFile(DEFAULT_INPUT);
    options.setOutput(FileSystems.matchNewResource(options.getTempRoot(), true).resolve(String.format("JpmsIT-%tF-%<tH-%<tM-%<tS-%<tL", new Date()), StandardResolveOptions.RESOLVE_DIRECTORY).resolve("output", StandardResolveOptions.RESOLVE_DIRECTORY).resolve("results", StandardResolveOptions.RESOLVE_FILE).toString());
    WordCount.runWordCount(options);
    assertThat(new NumberedShardedFile(options.getOutput() + "*-of-*"), fileContentsHaveChecksum(DEFAULT_OUTPUT_CHECKSUM));
}
Also used : NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) Date(java.util.Date) Test(org.junit.Test)

Aggregations

NumberedShardedFile (org.apache.beam.sdk.util.NumberedShardedFile)14 Test (org.junit.Test)13 File (java.io.File)8 Date (java.util.Date)5 ResourceId (org.apache.beam.sdk.io.fs.ResourceId)2 IOException (java.io.IOException)1 TreeMap (java.util.TreeMap)1 Pattern (java.util.regex.Pattern)1 PerWindowFiles (org.apache.beam.examples.common.WriteOneFilePerWindow.PerWindowFiles)1 Pipeline (org.apache.beam.sdk.Pipeline)1 PipelineResult (org.apache.beam.sdk.PipelineResult)1 State (org.apache.beam.sdk.PipelineResult.State)1 GcsOptions (org.apache.beam.sdk.extensions.gcp.options.GcsOptions)1 GcsUtil (org.apache.beam.sdk.extensions.gcp.util.GcsUtil)1 MatchResult (org.apache.beam.sdk.io.fs.MatchResult)1 Metadata (org.apache.beam.sdk.io.fs.MatchResult.Metadata)1 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)1 TestPipelineOptions (org.apache.beam.sdk.testing.TestPipelineOptions)1 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)1 ExplicitShardedFile (org.apache.beam.sdk.util.ExplicitShardedFile)1