Search in sources :

Example 1 with Context

use of org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy.Context in project beam by apache.

the class FileBasedSinkTest method testCopyToOutputFiles.

/** Output files are copied to the destination location with the correct names and contents. */
@Test
public void testCopyToOutputFiles() throws Exception {
    SimpleSink.SimpleWriteOperation writeOp = buildWriteOperation();
    ResourceId outputDirectory = writeOp.getSink().getBaseOutputDirectoryProvider().get();
    List<String> inputFilenames = Arrays.asList("input-1", "input-2", "input-3");
    List<String> inputContents = Arrays.asList("1", "2", "3");
    List<String> expectedOutputFilenames = Arrays.asList("file-00-of-03.test", "file-01-of-03.test", "file-02-of-03.test");
    Map<ResourceId, ResourceId> inputFilePaths = new HashMap<>();
    List<ResourceId> expectedOutputPaths = new ArrayList<>();
    for (int i = 0; i < inputFilenames.size(); i++) {
        // Generate output paths.
        expectedOutputPaths.add(getBaseOutputDirectory().resolve(expectedOutputFilenames.get(i), StandardResolveOptions.RESOLVE_FILE));
        // Generate and write to input paths.
        File inputTmpFile = tmpFolder.newFile(inputFilenames.get(i));
        List<String> lines = Collections.singletonList(inputContents.get(i));
        writeFile(lines, inputTmpFile);
        inputFilePaths.put(LocalResources.fromFile(inputTmpFile, false), writeOp.getSink().getFilenamePolicy().unwindowedFilename(outputDirectory, new Context(i, inputFilenames.size()), ""));
    }
    // Copy input files to output files.
    writeOp.copyToOutputFiles(inputFilePaths);
    // Assert that the contents were copied.
    for (int i = 0; i < expectedOutputPaths.size(); i++) {
        assertFileContains(Collections.singletonList(inputContents.get(i)), expectedOutputPaths.get(i));
    }
}
Also used : Context(org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy.Context) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) File(java.io.File) Test(org.junit.Test)

Example 2 with Context

use of org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy.Context in project beam by apache.

the class FileBasedSinkTest method runFinalize.

/** Finalize and verify that files are copied and temporary files are optionally removed. */
private void runFinalize(SimpleSink.SimpleWriteOperation writeOp, List<File> temporaryFiles) throws Exception {
    int numFiles = temporaryFiles.size();
    List<FileResult> fileResults = new ArrayList<>();
    // Create temporary output bundles and output File objects.
    for (int i = 0; i < numFiles; i++) {
        fileResults.add(new FileResult(LocalResources.fromFile(temporaryFiles.get(i), false), WriteFiles.UNKNOWN_SHARDNUM, null, null));
    }
    writeOp.finalize(fileResults);
    ResourceId outputDirectory = writeOp.getSink().getBaseOutputDirectoryProvider().get();
    for (int i = 0; i < numFiles; i++) {
        ResourceId outputFilename = writeOp.getSink().getFilenamePolicy().unwindowedFilename(outputDirectory, new Context(i, numFiles), "");
        assertTrue(new File(outputFilename.toString()).exists());
        assertFalse(temporaryFiles.get(i).exists());
    }
    assertFalse(new File(writeOp.tempDirectory.get().toString()).exists());
    // Test that repeated requests of the temp directory return a stable result.
    assertEquals(writeOp.tempDirectory.get(), writeOp.tempDirectory.get());
}
Also used : Context(org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy.Context) FileResult(org.apache.beam.sdk.io.FileBasedSink.FileResult) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) ArrayList(java.util.ArrayList) File(java.io.File)

Aggregations

File (java.io.File)2 ArrayList (java.util.ArrayList)2 Context (org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy.Context)2 ResourceId (org.apache.beam.sdk.io.fs.ResourceId)2 HashMap (java.util.HashMap)1 FileResult (org.apache.beam.sdk.io.FileBasedSink.FileResult)1 Test (org.junit.Test)1