Search in sources :

Example 1 with NumberedShardedFile

use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.

the class WordCountIT method testE2EWordCount.

@Test
public void testE2EWordCount() throws Exception {
    WordCountITOptions options = TestPipeline.testingPipelineOptions().as(WordCountITOptions.class);
    options.setInputFile(DEFAULT_INPUT);
    options.setOutput(FileSystems.matchNewResource(options.getTempRoot(), true).resolve(String.format("WordCountIT-%tF-%<tH-%<tM-%<tS-%<tL", new Date()), StandardResolveOptions.RESOLVE_DIRECTORY).resolve("output", StandardResolveOptions.RESOLVE_DIRECTORY).resolve("results", StandardResolveOptions.RESOLVE_FILE).toString());
    WordCount.runWordCount(options);
    assertThat(new NumberedShardedFile(options.getOutput() + "*-of-*"), fileContentsHaveChecksum(DEFAULT_OUTPUT_CHECKSUM));
}
Also used : NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) Date(java.util.Date) Test(org.junit.Test)

Example 2 with NumberedShardedFile

use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.

the class TopWikipediaSessionsIT method testE2ETopWikiPages.

@Test
public void testE2ETopWikiPages() throws Exception {
    TopWikipediaSessionsITOptions options = TestPipeline.testingPipelineOptions().as(TopWikipediaSessionsITOptions.class);
    options.setWikiInput(DEFAULT_INPUT_10_FILES);
    options.setOutput(FileSystems.matchNewResource(options.getTempRoot(), true).resolve(String.format("topwikisessions-it-%tF-%<tH-%<tM-%<tS-%<tL", new Date()), StandardResolveOptions.RESOLVE_DIRECTORY).resolve("output", StandardResolveOptions.RESOLVE_DIRECTORY).resolve("results", StandardResolveOptions.RESOLVE_FILE).toString());
    TopWikipediaSessions.run(options);
    assertThat(new NumberedShardedFile(options.getOutput() + "*-of-*"), fileContentsHaveChecksum(DEFAULT_OUTPUT_CHECKSUM));
}
Also used : NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) Date(java.util.Date) Test(org.junit.Test)

Example 3 with NumberedShardedFile

use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.

the class TextTableProviderTest method testInvalidJson.

@Test
public void testInvalidJson() throws Exception {
    File deadLetterFile = new File(tempFolder.getRoot(), "dead-letter-file");
    Files.write(tempFolder.newFile("test.json").toPath(), INVALID_JSON_TEXT.getBytes(Charsets.UTF_8));
    String query = "SELECT * FROM test";
    String ddl = String.format("CREATE EXTERNAL TABLE test %s TYPE text LOCATION '%s/*' " + "TBLPROPERTIES '{\"format\":\"json\", \"deadLetterFile\": \"%s\"}'", SQL_JSON_SCHEMA, tempFolder.getRoot(), deadLetterFile.getAbsoluteFile());
    PCollection<Row> rows = pipeline.apply(SqlTransform.query(query).withDdlString(ddl));
    PAssert.that(rows).empty();
    pipeline.run();
    assertThat(new NumberedShardedFile(deadLetterFile.getAbsoluteFile() + "*").readFilesWithRetries(Sleeper.DEFAULT, BackOff.STOP_BACKOFF), containsInAnyOrder(INVALID_JSON_TEXT));
}
Also used : NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) Row(org.apache.beam.sdk.values.Row) File(java.io.File) NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) Test(org.junit.Test)

Example 4 with NumberedShardedFile

use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.

the class TextTableProviderTest method testWriteLines.

@Test
public void testWriteLines() throws Exception {
    File destinationFile = new File(tempFolder.getRoot(), "lines-outputs");
    String query = "INSERT INTO test VALUES ('hello'), ('goodbye')";
    String ddl = String.format("CREATE EXTERNAL TABLE test %s TYPE text LOCATION '%s' TBLPROPERTIES '{\"format\":\"lines\"}'", SQL_LINES_SCHEMA, destinationFile.getAbsolutePath());
    pipeline.apply(SqlTransform.query(query).withDdlString(ddl));
    pipeline.run();
    assertThat(new NumberedShardedFile(destinationFile.getAbsolutePath() + "*").readFilesWithRetries(Sleeper.DEFAULT, BackOff.STOP_BACKOFF), containsInAnyOrder("hello", "goodbye"));
}
Also used : NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) File(java.io.File) NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) Test(org.junit.Test)

Example 5 with NumberedShardedFile

use of org.apache.beam.sdk.util.NumberedShardedFile in project beam by apache.

the class TextTableProviderTest method testWriteJson.

@Test
public void testWriteJson() throws Exception {
    File destinationFile = new File(tempFolder.getRoot(), "json-outputs");
    String query = "INSERT INTO test(name, age) VALUES ('Jack', 13)";
    String ddl = String.format("CREATE EXTERNAL TABLE test %s TYPE text LOCATION '%s' TBLPROPERTIES '{\"format\":\"json\"}'", SQL_JSON_SCHEMA, destinationFile.getAbsolutePath());
    pipeline.apply(SqlTransform.query(query).withDdlString(ddl));
    pipeline.run();
    assertThat(new NumberedShardedFile(destinationFile.getAbsolutePath() + "*").readFilesWithRetries(Sleeper.DEFAULT, BackOff.STOP_BACKOFF), containsInAnyOrder(JSON_TEXT));
}
Also used : NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) File(java.io.File) NumberedShardedFile(org.apache.beam.sdk.util.NumberedShardedFile) Test(org.junit.Test)

Aggregations

NumberedShardedFile (org.apache.beam.sdk.util.NumberedShardedFile)14 Test (org.junit.Test)13 File (java.io.File)8 Date (java.util.Date)5 ResourceId (org.apache.beam.sdk.io.fs.ResourceId)2 IOException (java.io.IOException)1 TreeMap (java.util.TreeMap)1 Pattern (java.util.regex.Pattern)1 PerWindowFiles (org.apache.beam.examples.common.WriteOneFilePerWindow.PerWindowFiles)1 Pipeline (org.apache.beam.sdk.Pipeline)1 PipelineResult (org.apache.beam.sdk.PipelineResult)1 State (org.apache.beam.sdk.PipelineResult.State)1 GcsOptions (org.apache.beam.sdk.extensions.gcp.options.GcsOptions)1 GcsUtil (org.apache.beam.sdk.extensions.gcp.util.GcsUtil)1 MatchResult (org.apache.beam.sdk.io.fs.MatchResult)1 Metadata (org.apache.beam.sdk.io.fs.MatchResult.Metadata)1 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)1 TestPipelineOptions (org.apache.beam.sdk.testing.TestPipelineOptions)1 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)1 ExplicitShardedFile (org.apache.beam.sdk.util.ExplicitShardedFile)1