use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.
the class GcsKmsKeyIT method testGcsWriteWithKmsKey.
/**
* Tests writing to tempLocation with --dataflowKmsKey set on the command line. Verifies that
* resulting output uses specified key and is readable. Does not verify any temporary files.
*
* <p>This test verifies that GCS file copies work with CMEK-enabled files.
*/
@Test
public void testGcsWriteWithKmsKey() {
TestPipelineOptions options = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);
assertNotNull(options.getTempRoot());
options.setTempLocation(options.getTempRoot() + "/testGcsWriteWithKmsKey");
GcsOptions gcsOptions = options.as(GcsOptions.class);
ResourceId filenamePrefix = FileSystems.matchNewResource(gcsOptions.getGcpTempLocation(), true).resolve(String.format("GcsKmsKeyIT-%tF-%<tH-%<tM-%<tS-%<tL.output", new Date()), StandardResolveOptions.RESOLVE_FILE);
Pipeline p = Pipeline.create(options);
p.apply("ReadLines", TextIO.read().from(INPUT_FILE)).apply("WriteLines", TextIO.write().to(filenamePrefix));
PipelineResult result = p.run();
State state = result.waitUntilFinish();
assertThat(state, equalTo(State.DONE));
String filePattern = filenamePrefix + "*-of-*";
assertThat(new NumberedShardedFile(filePattern), fileContentsHaveChecksum(EXPECTED_CHECKSUM));
// Verify objects have KMS key set.
try {
MatchResult matchResult = Iterables.getOnlyElement(FileSystems.match(Collections.singletonList(filePattern)));
GcsUtil gcsUtil = gcsOptions.getGcsUtil();
for (Metadata metadata : matchResult.metadata()) {
String kmsKey = gcsUtil.getObject(GcsPath.fromUri(metadata.resourceId().toString())).getKmsKeyName();
assertNotNull(kmsKey);
}
} catch (IOException e) {
throw new AssertionError(e);
}
}
use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.
the class WordCountIT method testE2EWordCount.
@Test
public void testE2EWordCount() throws Exception {
WordCountITOptions options = TestPipeline.testingPipelineOptions().as(WordCountITOptions.class);
options.setInputFile(DEFAULT_INPUT);
options.setOutput(FileSystems.matchNewResource(options.getTempRoot(), true).resolve(String.format("WordCountIT-%tF-%<tH-%<tM-%<tS-%<tL", new Date()), StandardResolveOptions.RESOLVE_DIRECTORY).resolve("output", StandardResolveOptions.RESOLVE_DIRECTORY).resolve("results", StandardResolveOptions.RESOLVE_FILE).toString());
WordCount.runWordCount(options);
assertThat(new NumberedShardedFile(options.getOutput() + "*-of-*"), fileContentsHaveChecksum(DEFAULT_OUTPUT_CHECKSUM));
}
use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.
the class TopWikipediaSessionsIT method testE2ETopWikiPages.
@Test
public void testE2ETopWikiPages() throws Exception {
TopWikipediaSessionsITOptions options = TestPipeline.testingPipelineOptions().as(TopWikipediaSessionsITOptions.class);
options.setWikiInput(DEFAULT_INPUT_10_FILES);
options.setOutput(FileSystems.matchNewResource(options.getTempRoot(), true).resolve(String.format("topwikisessions-it-%tF-%<tH-%<tM-%<tS-%<tL", new Date()), StandardResolveOptions.RESOLVE_DIRECTORY).resolve("output", StandardResolveOptions.RESOLVE_DIRECTORY).resolve("results", StandardResolveOptions.RESOLVE_FILE).toString());
TopWikipediaSessions.run(options);
assertThat(new NumberedShardedFile(options.getOutput() + "*-of-*"), fileContentsHaveChecksum(DEFAULT_OUTPUT_CHECKSUM));
}
use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.
the class TextTableProviderTest method testWriteCsv.
@Test
public void testWriteCsv() throws Exception {
File destinationFile = new File(tempFolder.getRoot(), "csv-outputs");
// NumberedShardedFile
String query = "INSERT INTO test VALUES ('hello', 42), ('goodbye', 13)";
String ddl = String.format("CREATE EXTERNAL TABLE test %s TYPE text LOCATION '%s' TBLPROPERTIES '{\"format\":\"csv\"}'", SQL_CSV_SCHEMA, destinationFile.getAbsolutePath());
pipeline.apply(SqlTransform.query(query).withDdlString(ddl));
pipeline.run();
assertThat(new NumberedShardedFile(destinationFile.getAbsolutePath() + "*").readFilesWithRetries(Sleeper.DEFAULT, BackOff.STOP_BACKOFF), containsInAnyOrder("hello,42", "goodbye,13"));
}
use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.
the class JpmsIT method testE2EJpms.
@Test
public void testE2EJpms() {
JpmsITOptions options = TestPipeline.testingPipelineOptions().as(JpmsITOptions.class);
options.setInputFile(DEFAULT_INPUT);
options.setOutput(FileSystems.matchNewResource(options.getTempRoot(), true).resolve(String.format("JpmsIT-%tF-%<tH-%<tM-%<tS-%<tL", new Date()), StandardResolveOptions.RESOLVE_DIRECTORY).resolve("output", StandardResolveOptions.RESOLVE_DIRECTORY).resolve("results", StandardResolveOptions.RESOLVE_FILE).toString());
WordCount.runWordCount(options);
assertThat(new NumberedShardedFile(options.getOutput() + "*-of-*"), fileContentsHaveChecksum(DEFAULT_OUTPUT_CHECKSUM));
}
Aggregations