Search in sources :

Example 1 with ResourceId

use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.

the class HadoopResourceIdTest method testResourceIdTester.

@Test
public void testResourceIdTester() throws Exception {
    ResourceId baseDirectory = FileSystems.matchNewResource("hdfs://" + hdfsClusterBaseUri.getPath(), true);
    ResourceIdTester.runResourceIdBattery(baseDirectory);
}
Also used : ResourceId(org.apache.beam.sdk.io.fs.ResourceId) Test(org.junit.Test)

Example 2 with ResourceId

use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.

the class FileBasedSinkTest method testGenerateOutputFilenames.

/**
   * Output filenames are generated correctly when an extension is supplied.
   */
@Test
public void testGenerateOutputFilenames() {
    List<ResourceId> expected;
    List<ResourceId> actual;
    ResourceId root = getBaseOutputDirectory();
    SimpleSink sink = new SimpleSink(root, "file", ".SSSSS.of.NNNNN", ".test");
    FilenamePolicy policy = sink.getFilenamePolicy();
    expected = Arrays.asList(root.resolve("file.00000.of.00003.test", StandardResolveOptions.RESOLVE_FILE), root.resolve("file.00001.of.00003.test", StandardResolveOptions.RESOLVE_FILE), root.resolve("file.00002.of.00003.test", StandardResolveOptions.RESOLVE_FILE));
    actual = generateDestinationFilenames(root, policy, 3);
    assertEquals(expected, actual);
    expected = Collections.singletonList(root.resolve("file.00000.of.00001.test", StandardResolveOptions.RESOLVE_FILE));
    actual = generateDestinationFilenames(root, policy, 1);
    assertEquals(expected, actual);
    expected = new ArrayList<>();
    actual = generateDestinationFilenames(root, policy, 0);
    assertEquals(expected, actual);
}
Also used : ResourceId(org.apache.beam.sdk.io.fs.ResourceId) FilenamePolicy(org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy) Test(org.junit.Test)

Example 3 with ResourceId

use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.

the class FileBasedSinkTest method testRemoveTemporaryFiles.

/**
   * Create n temporary and output files and verify that removeTemporaryFiles only removes temporary
   * files.
   */
private void testRemoveTemporaryFiles(int numFiles, ResourceId tempDirectory) throws Exception {
    String prefix = "file";
    SimpleSink sink = new SimpleSink(getBaseOutputDirectory(), prefix, "", "");
    WriteOperation<String> writeOp = new SimpleSink.SimpleWriteOperation(sink, tempDirectory);
    List<File> temporaryFiles = new ArrayList<>();
    List<File> outputFiles = new ArrayList<>();
    for (int i = 0; i < numFiles; i++) {
        ResourceId tempResource = WriteOperation.buildTemporaryFilename(tempDirectory, prefix + i);
        File tmpFile = new File(tempResource.toString());
        tmpFile.getParentFile().mkdirs();
        assertTrue("not able to create new temp file", tmpFile.createNewFile());
        temporaryFiles.add(tmpFile);
        ResourceId outputFileId = getBaseOutputDirectory().resolve(prefix + i, StandardResolveOptions.RESOLVE_FILE);
        File outputFile = new File(outputFileId.toString());
        outputFile.getParentFile().mkdirs();
        assertTrue("not able to create new output file", outputFile.createNewFile());
        outputFiles.add(outputFile);
    }
    writeOp.removeTemporaryFiles(Collections.<ResourceId>emptySet(), true);
    for (int i = 0; i < numFiles; i++) {
        File temporaryFile = temporaryFiles.get(i);
        assertThat(String.format("temp file %s exists", temporaryFile), temporaryFile.exists(), is(false));
        File outputFile = outputFiles.get(i);
        assertThat(String.format("output file %s exists", outputFile), outputFile.exists(), is(true));
    }
}
Also used : ResourceId(org.apache.beam.sdk.io.fs.ResourceId) ArrayList(java.util.ArrayList) File(java.io.File)

Example 4 with ResourceId

use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.

the class FileBasedSinkTest method testFileBasedWriterWithWritableByteChannelFactory.

/**
   * {@link Writer} writes to the {@link WritableByteChannel} provided by {@link
   * DrunkWritableByteChannelFactory}.
   */
@Test
public void testFileBasedWriterWithWritableByteChannelFactory() throws Exception {
    final String testUid = "testId";
    ResourceId root = getBaseOutputDirectory();
    WriteOperation<String> writeOp = new SimpleSink(root, "file", "-SS-of-NN", "txt", new DrunkWritableByteChannelFactory()).createWriteOperation();
    final Writer<String> writer = writeOp.createWriter();
    final ResourceId expectedFile = writeOp.tempDirectory.get().resolve(testUid, StandardResolveOptions.RESOLVE_FILE);
    final List<String> expected = new ArrayList<>();
    expected.add("header");
    expected.add("header");
    expected.add("a");
    expected.add("a");
    expected.add("b");
    expected.add("b");
    expected.add("footer");
    expected.add("footer");
    writer.openUnwindowed(testUid, -1);
    writer.write("a");
    writer.write("b");
    final FileResult result = writer.close();
    assertEquals(expectedFile, result.getTempFilename());
    assertFileContains(expected, expectedFile);
}
Also used : FileResult(org.apache.beam.sdk.io.FileBasedSink.FileResult) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 5 with ResourceId

use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.

the class FileBasedSinkTest method generateTemporaryFilesForFinalize.

/** Generate n temporary files using the temporary file pattern of Writer. */
private List<File> generateTemporaryFilesForFinalize(int numFiles) throws Exception {
    List<File> temporaryFiles = new ArrayList<>();
    for (int i = 0; i < numFiles; i++) {
        ResourceId temporaryFile = WriteOperation.buildTemporaryFilename(getBaseTempDirectory(), "" + i);
        File tmpFile = new File(tmpFolder.getRoot(), temporaryFile.toString());
        tmpFile.getParentFile().mkdirs();
        assertTrue(tmpFile.createNewFile());
        temporaryFiles.add(tmpFile);
    }
    return temporaryFiles;
}
Also used : ResourceId(org.apache.beam.sdk.io.fs.ResourceId) ArrayList(java.util.ArrayList) File(java.io.File)

Aggregations

ResourceId (org.apache.beam.sdk.io.fs.ResourceId)23 Test (org.junit.Test)12 ArrayList (java.util.ArrayList)7 File (java.io.File)4 FileResult (org.apache.beam.sdk.io.FileBasedSink.FileResult)4 FilenamePolicy (org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy)4 TableRow (com.google.api.services.bigquery.model.TableRow)3 TableSchema (com.google.api.services.bigquery.model.TableSchema)3 ImmutableList (com.google.common.collect.ImmutableList)3 JobStatus (com.google.api.services.bigquery.model.JobStatus)2 TableReference (com.google.api.services.bigquery.model.TableReference)2 DefaultFilenamePolicy (org.apache.beam.sdk.io.DefaultFilenamePolicy)2 Context (org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy.Context)2 TextIO (org.apache.beam.sdk.io.TextIO)2 GoogleJsonResponseException (com.google.api.client.googleapis.json.GoogleJsonResponseException)1 ErrorProto (com.google.api.services.bigquery.model.ErrorProto)1 Job (com.google.api.services.bigquery.model.Job)1 JobConfiguration (com.google.api.services.bigquery.model.JobConfiguration)1 JobStatistics (com.google.api.services.bigquery.model.JobStatistics)1 Table (com.google.api.services.bigquery.model.Table)1