use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.
the class FileBasedSinkTest method testGenerateOutputFilenamesWithoutExtension.
/** Output filenames are generated correctly when an extension is not supplied. */
@Test
public void testGenerateOutputFilenamesWithoutExtension() {
List<ResourceId> expected;
List<ResourceId> actual;
ResourceId root = getBaseOutputDirectory();
SimpleSink sink = new SimpleSink(root, "file", "-SSSSS-of-NNNNN", "");
FilenamePolicy policy = sink.getFilenamePolicy();
expected = Arrays.asList(root.resolve("file-00000-of-00003", StandardResolveOptions.RESOLVE_FILE), root.resolve("file-00001-of-00003", StandardResolveOptions.RESOLVE_FILE), root.resolve("file-00002-of-00003", StandardResolveOptions.RESOLVE_FILE));
actual = generateDestinationFilenames(root, policy, 3);
assertEquals(expected, actual);
expected = Collections.singletonList(root.resolve("file-00000-of-00001", StandardResolveOptions.RESOLVE_FILE));
actual = generateDestinationFilenames(root, policy, 1);
assertEquals(expected, actual);
expected = new ArrayList<>();
actual = generateDestinationFilenames(root, policy, 0);
assertEquals(expected, actual);
}
use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.
the class FileBasedSinkTest method testWriter.
/**
* Writer opens the correct file, writes the header, footer, and elements in the correct
* order, and returns the correct filename.
*/
@Test
public void testWriter() throws Exception {
String testUid = "testId";
ResourceId expectedTempFile = getBaseTempDirectory().resolve(testUid, StandardResolveOptions.RESOLVE_FILE);
List<String> values = Arrays.asList("sympathetic vulture", "boresome hummingbird");
List<String> expected = new ArrayList<>();
expected.add(SimpleSink.SimpleWriter.HEADER);
expected.addAll(values);
expected.add(SimpleSink.SimpleWriter.FOOTER);
SimpleSink.SimpleWriter writer = buildWriteOperationWithTempDir(getBaseTempDirectory()).createWriter();
writer.openUnwindowed(testUid, -1);
for (String value : values) {
writer.write(value);
}
FileResult result = writer.close();
FileBasedSink sink = writer.getWriteOperation().getSink();
assertEquals(expectedTempFile, result.getTempFilename());
assertFileContains(expected, expectedTempFile);
}
use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.
the class FileBasedSinkTest method runFinalize.
/** Finalize and verify that files are copied and temporary files are optionally removed. */
private void runFinalize(SimpleSink.SimpleWriteOperation writeOp, List<File> temporaryFiles) throws Exception {
int numFiles = temporaryFiles.size();
List<FileResult> fileResults = new ArrayList<>();
// Create temporary output bundles and output File objects.
for (int i = 0; i < numFiles; i++) {
fileResults.add(new FileResult(LocalResources.fromFile(temporaryFiles.get(i), false), WriteFiles.UNKNOWN_SHARDNUM, null, null));
}
writeOp.finalize(fileResults);
ResourceId outputDirectory = writeOp.getSink().getBaseOutputDirectoryProvider().get();
for (int i = 0; i < numFiles; i++) {
ResourceId outputFilename = writeOp.getSink().getFilenamePolicy().unwindowedFilename(outputDirectory, new Context(i, numFiles), "");
assertTrue(new File(outputFilename.toString()).exists());
assertFalse(temporaryFiles.get(i).exists());
}
assertFalse(new File(writeOp.tempDirectory.get().toString()).exists());
// Test that repeated requests of the temp directory return a stable result.
assertEquals(writeOp.tempDirectory.get(), writeOp.tempDirectory.get());
}
use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.
the class FileBasedSinkTest method testCollidingOutputFilenames.
/** Reject non-distinct output filenames. */
@Test
public void testCollidingOutputFilenames() throws IOException {
ResourceId root = getBaseOutputDirectory();
SimpleSink sink = new SimpleSink(root, "file", "-NN", "test");
SimpleSink.SimpleWriteOperation writeOp = new SimpleSink.SimpleWriteOperation(sink);
ResourceId temp1 = root.resolve("temp1", StandardResolveOptions.RESOLVE_FILE);
ResourceId temp2 = root.resolve("temp2", StandardResolveOptions.RESOLVE_FILE);
ResourceId temp3 = root.resolve("temp3", StandardResolveOptions.RESOLVE_FILE);
ResourceId output = root.resolve("file-03.test", StandardResolveOptions.RESOLVE_FILE);
// More than one shard does.
try {
Iterable<FileResult> results = Lists.newArrayList(new FileResult(temp1, 1, null, null), new FileResult(temp2, 1, null, null), new FileResult(temp3, 1, null, null));
writeOp.buildOutputFilenames(results);
fail("Should have failed.");
} catch (IllegalStateException exn) {
assertEquals("Only generated 1 distinct file names for 3 files.", exn.getMessage());
}
}
use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.
the class WriteWithShardingFactoryTest method withNoShardingSpecifiedReturnsNewTransform.
@Test
public void withNoShardingSpecifiedReturnsNewTransform() {
ResourceId outputDirectory = LocalResources.fromString("/foo", true);
FilenamePolicy policy = DefaultFilenamePolicy.constructUsingStandardParameters(StaticValueProvider.of(outputDirectory), DefaultFilenamePolicy.DEFAULT_SHARD_TEMPLATE, "");
WriteFiles<Object> original = WriteFiles.to(new FileBasedSink<Object>(StaticValueProvider.of(outputDirectory), policy) {
@Override
public WriteOperation<Object> createWriteOperation() {
throw new IllegalArgumentException("Should not be used");
}
});
@SuppressWarnings("unchecked") PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of()));
AppliedPTransform<PCollection<Object>, PDone, WriteFiles<Object>> originalApplication = AppliedPTransform.of("write", objs.expand(), Collections.<TupleTag<?>, PValue>emptyMap(), original, p);
assertThat(factory.getReplacementTransform(originalApplication).getTransform(), not(equalTo((Object) original)));
}
Aggregations