Search in sources :

Example 16 with ResourceId

use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.

the class FileBasedSinkTest method testGenerateOutputFilenamesWithoutExtension.

/** Output filenames are generated correctly when an extension is not supplied. */
@Test
public void testGenerateOutputFilenamesWithoutExtension() {
    List<ResourceId> expected;
    List<ResourceId> actual;
    ResourceId root = getBaseOutputDirectory();
    SimpleSink sink = new SimpleSink(root, "file", "-SSSSS-of-NNNNN", "");
    FilenamePolicy policy = sink.getFilenamePolicy();
    expected = Arrays.asList(root.resolve("file-00000-of-00003", StandardResolveOptions.RESOLVE_FILE), root.resolve("file-00001-of-00003", StandardResolveOptions.RESOLVE_FILE), root.resolve("file-00002-of-00003", StandardResolveOptions.RESOLVE_FILE));
    actual = generateDestinationFilenames(root, policy, 3);
    assertEquals(expected, actual);
    expected = Collections.singletonList(root.resolve("file-00000-of-00001", StandardResolveOptions.RESOLVE_FILE));
    actual = generateDestinationFilenames(root, policy, 1);
    assertEquals(expected, actual);
    expected = new ArrayList<>();
    actual = generateDestinationFilenames(root, policy, 0);
    assertEquals(expected, actual);
}
Also used : ResourceId(org.apache.beam.sdk.io.fs.ResourceId) FilenamePolicy(org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy) Test(org.junit.Test)

Example 17 with ResourceId

use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.

the class FileBasedSinkTest method testWriter.

/**
   * Writer opens the correct file, writes the header, footer, and elements in the correct
   * order, and returns the correct filename.
   */
@Test
public void testWriter() throws Exception {
    String testUid = "testId";
    ResourceId expectedTempFile = getBaseTempDirectory().resolve(testUid, StandardResolveOptions.RESOLVE_FILE);
    List<String> values = Arrays.asList("sympathetic vulture", "boresome hummingbird");
    List<String> expected = new ArrayList<>();
    expected.add(SimpleSink.SimpleWriter.HEADER);
    expected.addAll(values);
    expected.add(SimpleSink.SimpleWriter.FOOTER);
    SimpleSink.SimpleWriter writer = buildWriteOperationWithTempDir(getBaseTempDirectory()).createWriter();
    writer.openUnwindowed(testUid, -1);
    for (String value : values) {
        writer.write(value);
    }
    FileResult result = writer.close();
    FileBasedSink sink = writer.getWriteOperation().getSink();
    assertEquals(expectedTempFile, result.getTempFilename());
    assertFileContains(expected, expectedTempFile);
}
Also used : FileResult(org.apache.beam.sdk.io.FileBasedSink.FileResult) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 18 with ResourceId

use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.

the class FileBasedSinkTest method runFinalize.

/** Finalize and verify that files are copied and temporary files are optionally removed. */
private void runFinalize(SimpleSink.SimpleWriteOperation writeOp, List<File> temporaryFiles) throws Exception {
    int numFiles = temporaryFiles.size();
    List<FileResult> fileResults = new ArrayList<>();
    // Create temporary output bundles and output File objects.
    for (int i = 0; i < numFiles; i++) {
        fileResults.add(new FileResult(LocalResources.fromFile(temporaryFiles.get(i), false), WriteFiles.UNKNOWN_SHARDNUM, null, null));
    }
    writeOp.finalize(fileResults);
    ResourceId outputDirectory = writeOp.getSink().getBaseOutputDirectoryProvider().get();
    for (int i = 0; i < numFiles; i++) {
        ResourceId outputFilename = writeOp.getSink().getFilenamePolicy().unwindowedFilename(outputDirectory, new Context(i, numFiles), "");
        assertTrue(new File(outputFilename.toString()).exists());
        assertFalse(temporaryFiles.get(i).exists());
    }
    assertFalse(new File(writeOp.tempDirectory.get().toString()).exists());
    // Test that repeated requests of the temp directory return a stable result.
    assertEquals(writeOp.tempDirectory.get(), writeOp.tempDirectory.get());
}
Also used : Context(org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy.Context) FileResult(org.apache.beam.sdk.io.FileBasedSink.FileResult) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) ArrayList(java.util.ArrayList) File(java.io.File)

Example 19 with ResourceId

use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.

the class FileBasedSinkTest method testCollidingOutputFilenames.

/** Reject non-distinct output filenames. */
@Test
public void testCollidingOutputFilenames() throws IOException {
    ResourceId root = getBaseOutputDirectory();
    SimpleSink sink = new SimpleSink(root, "file", "-NN", "test");
    SimpleSink.SimpleWriteOperation writeOp = new SimpleSink.SimpleWriteOperation(sink);
    ResourceId temp1 = root.resolve("temp1", StandardResolveOptions.RESOLVE_FILE);
    ResourceId temp2 = root.resolve("temp2", StandardResolveOptions.RESOLVE_FILE);
    ResourceId temp3 = root.resolve("temp3", StandardResolveOptions.RESOLVE_FILE);
    ResourceId output = root.resolve("file-03.test", StandardResolveOptions.RESOLVE_FILE);
    // More than one shard does.
    try {
        Iterable<FileResult> results = Lists.newArrayList(new FileResult(temp1, 1, null, null), new FileResult(temp2, 1, null, null), new FileResult(temp3, 1, null, null));
        writeOp.buildOutputFilenames(results);
        fail("Should have failed.");
    } catch (IllegalStateException exn) {
        assertEquals("Only generated 1 distinct file names for 3 files.", exn.getMessage());
    }
}
Also used : FileResult(org.apache.beam.sdk.io.FileBasedSink.FileResult) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) Test(org.junit.Test)

Example 20 with ResourceId

use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.

the class WriteWithShardingFactoryTest method withNoShardingSpecifiedReturnsNewTransform.

@Test
public void withNoShardingSpecifiedReturnsNewTransform() {
    ResourceId outputDirectory = LocalResources.fromString("/foo", true);
    FilenamePolicy policy = DefaultFilenamePolicy.constructUsingStandardParameters(StaticValueProvider.of(outputDirectory), DefaultFilenamePolicy.DEFAULT_SHARD_TEMPLATE, "");
    WriteFiles<Object> original = WriteFiles.to(new FileBasedSink<Object>(StaticValueProvider.of(outputDirectory), policy) {

        @Override
        public WriteOperation<Object> createWriteOperation() {
            throw new IllegalArgumentException("Should not be used");
        }
    });
    @SuppressWarnings("unchecked") PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of()));
    AppliedPTransform<PCollection<Object>, PDone, WriteFiles<Object>> originalApplication = AppliedPTransform.of("write", objs.expand(), Collections.<TupleTag<?>, PValue>emptyMap(), original, p);
    assertThat(factory.getReplacementTransform(originalApplication).getTransform(), not(equalTo((Object) original)));
}
Also used : DefaultFilenamePolicy(org.apache.beam.sdk.io.DefaultFilenamePolicy) FilenamePolicy(org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy) PCollection(org.apache.beam.sdk.values.PCollection) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) PDone(org.apache.beam.sdk.values.PDone) WriteFiles(org.apache.beam.sdk.io.WriteFiles) Test(org.junit.Test)

Aggregations

ResourceId (org.apache.beam.sdk.io.fs.ResourceId)23 Test (org.junit.Test)12 ArrayList (java.util.ArrayList)7 File (java.io.File)4 FileResult (org.apache.beam.sdk.io.FileBasedSink.FileResult)4 FilenamePolicy (org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy)4 TableRow (com.google.api.services.bigquery.model.TableRow)3 TableSchema (com.google.api.services.bigquery.model.TableSchema)3 ImmutableList (com.google.common.collect.ImmutableList)3 JobStatus (com.google.api.services.bigquery.model.JobStatus)2 TableReference (com.google.api.services.bigquery.model.TableReference)2 DefaultFilenamePolicy (org.apache.beam.sdk.io.DefaultFilenamePolicy)2 Context (org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy.Context)2 TextIO (org.apache.beam.sdk.io.TextIO)2 GoogleJsonResponseException (com.google.api.client.googleapis.json.GoogleJsonResponseException)1 ErrorProto (com.google.api.services.bigquery.model.ErrorProto)1 Job (com.google.api.services.bigquery.model.Job)1 JobConfiguration (com.google.api.services.bigquery.model.JobConfiguration)1 JobStatistics (com.google.api.services.bigquery.model.JobStatistics)1 Table (com.google.api.services.bigquery.model.Table)1