use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.
the class WordCountIT method testE2EWordCount.
@Test
public void testE2EWordCount() throws Exception {
WordCountITOptions options = TestPipeline.testingPipelineOptions().as(WordCountITOptions.class);
options.setInputFile(DEFAULT_INPUT);
options.setOutput(FileSystems.matchNewResource(options.getTempRoot(), true).resolve(String.format("WordCountIT-%tF-%<tH-%<tM-%<tS-%<tL", new Date()), StandardResolveOptions.RESOLVE_DIRECTORY).resolve("output", StandardResolveOptions.RESOLVE_DIRECTORY).resolve("results", StandardResolveOptions.RESOLVE_FILE).toString());
WordCount.runWordCount(options);
assertThat(new NumberedShardedFile(options.getOutput() + "*-of-*"), fileContentsHaveChecksum(DEFAULT_OUTPUT_CHECKSUM));
}
use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.
the class TopWikipediaSessionsIT method testE2ETopWikiPages.
@Test
public void testE2ETopWikiPages() throws Exception {
TopWikipediaSessionsITOptions options = TestPipeline.testingPipelineOptions().as(TopWikipediaSessionsITOptions.class);
options.setWikiInput(DEFAULT_INPUT_10_FILES);
options.setOutput(FileSystems.matchNewResource(options.getTempRoot(), true).resolve(String.format("topwikisessions-it-%tF-%<tH-%<tM-%<tS-%<tL", new Date()), StandardResolveOptions.RESOLVE_DIRECTORY).resolve("output", StandardResolveOptions.RESOLVE_DIRECTORY).resolve("results", StandardResolveOptions.RESOLVE_FILE).toString());
TopWikipediaSessions.run(options);
assertThat(new NumberedShardedFile(options.getOutput() + "*-of-*"), fileContentsHaveChecksum(DEFAULT_OUTPUT_CHECKSUM));
}
use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.
the class TextTableProviderTest method testInvalidJson.
@Test
public void testInvalidJson() throws Exception {
File deadLetterFile = new File(tempFolder.getRoot(), "dead-letter-file");
Files.write(tempFolder.newFile("test.json").toPath(), INVALID_JSON_TEXT.getBytes(Charsets.UTF_8));
String query = "SELECT * FROM test";
String ddl = String.format("CREATE EXTERNAL TABLE test %s TYPE text LOCATION '%s/*' " + "TBLPROPERTIES '{\"format\":\"json\", \"deadLetterFile\": \"%s\"}'", SQL_JSON_SCHEMA, tempFolder.getRoot(), deadLetterFile.getAbsoluteFile());
PCollection<Row> rows = pipeline.apply(SqlTransform.query(query).withDdlString(ddl));
PAssert.that(rows).empty();
pipeline.run();
assertThat(new NumberedShardedFile(deadLetterFile.getAbsoluteFile() + "*").readFilesWithRetries(Sleeper.DEFAULT, BackOff.STOP_BACKOFF), containsInAnyOrder(INVALID_JSON_TEXT));
}
use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.
the class TextTableProviderTest method testWriteLines.
@Test
public void testWriteLines() throws Exception {
File destinationFile = new File(tempFolder.getRoot(), "lines-outputs");
String query = "INSERT INTO test VALUES ('hello'), ('goodbye')";
String ddl = String.format("CREATE EXTERNAL TABLE test %s TYPE text LOCATION '%s' TBLPROPERTIES '{\"format\":\"lines\"}'", SQL_LINES_SCHEMA, destinationFile.getAbsolutePath());
pipeline.apply(SqlTransform.query(query).withDdlString(ddl));
pipeline.run();
assertThat(new NumberedShardedFile(destinationFile.getAbsolutePath() + "*").readFilesWithRetries(Sleeper.DEFAULT, BackOff.STOP_BACKOFF), containsInAnyOrder("hello", "goodbye"));
}
use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.
the class TextTableProviderTest method testWriteJson.
@Test
public void testWriteJson() throws Exception {
File destinationFile = new File(tempFolder.getRoot(), "json-outputs");
String query = "INSERT INTO test(name, age) VALUES ('Jack', 13)";
String ddl = String.format("CREATE EXTERNAL TABLE test %s TYPE text LOCATION '%s' TBLPROPERTIES '{\"format\":\"json\"}'", SQL_JSON_SCHEMA, destinationFile.getAbsolutePath());
pipeline.apply(SqlTransform.query(query).withDdlString(ddl));
pipeline.run();
assertThat(new NumberedShardedFile(destinationFile.getAbsolutePath() + "*").readFilesWithRetries(Sleeper.DEFAULT, BackOff.STOP_BACKOFF), containsInAnyOrder(JSON_TEXT));
}
Aggregations