use of org.apache.beam.sdk.io.fs.MatchResult.Metadata in project beam by apache.
the class FileBasedSourceTest method testToStringFile.
@Test
public void testToStringFile() throws Exception {
File f = createFileWithData("foo", Collections.<String>emptyList());
Metadata metadata = FileSystems.matchSingleFileSpec(f.getPath());
TestFileBasedSource source = new TestFileBasedSource(metadata, 1, 0, 10, null);
assertEquals(String.format("%s range [0, 10)", f.getAbsolutePath()), source.toString());
}
use of org.apache.beam.sdk.io.fs.MatchResult.Metadata in project beam by apache.
the class FileBasedSourceTest method testSplitAtFraction.
@Test
public void testSplitAtFraction() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
File file = createFileWithData("file", createStringDataset(3, 100));
Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
TestFileBasedSource source = new TestFileBasedSource(metadata, 1, 0, file.length(), null);
// Shouldn't be able to split while unstarted.
assertSplitAtFractionFails(source, 0, 0.7, options);
assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.7, options);
assertSplitAtFractionSucceedsAndConsistent(source, 30, 0.7, options);
assertSplitAtFractionFails(source, 0, 0.0, options);
assertSplitAtFractionFails(source, 70, 0.3, options);
assertSplitAtFractionFails(source, 100, 1.0, options);
assertSplitAtFractionFails(source, 100, 0.99, options);
assertSplitAtFractionSucceedsAndConsistent(source, 100, 0.995, options);
}
use of org.apache.beam.sdk.io.fs.MatchResult.Metadata in project beam by apache.
the class FileBasedSourceTest method testSplitAtFractionExhaustive.
@Test
public void testSplitAtFractionExhaustive() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
// Smaller file for exhaustive testing.
File file = createFileWithData("file", createStringDataset(3, 20));
Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
TestFileBasedSource source = new TestFileBasedSource(metadata, 1, 0, file.length(), null);
assertSplitAtFractionExhaustive(source, options);
}
use of org.apache.beam.sdk.io.fs.MatchResult.Metadata in project beam by apache.
the class NumberedShardedFile method readFilesWithRetries.
/**
* Discovers all shards of this file using the provided {@link Sleeper} and {@link BackOff}.
*
* <p>Because of eventual consistency, reads may discover no files or fewer files than
* the shard template implies. In this case, the read is considered to have failed.
*/
@Override
public List<String> readFilesWithRetries(Sleeper sleeper, BackOff backOff) throws IOException, InterruptedException {
IOException lastException = null;
do {
try {
// Match inputPath which may contains glob
Collection<Metadata> files = Iterables.getOnlyElement(FileSystems.match(Collections.singletonList(filePattern))).metadata();
LOG.debug("Found {} file(s) by matching the path: {}", files.size(), filePattern);
if (files.isEmpty() || !checkTotalNumOfFiles(files)) {
continue;
}
// Read data from file paths
return readLines(files);
} catch (IOException e) {
// Ignore and retry
lastException = e;
LOG.warn("Error in file reading. Ignore and retry.");
}
} while (BackOffUtils.next(sleeper, backOff));
// Failed after max retries
throw new IOException(String.format("Unable to read file(s) after retrying %d times", MAX_READ_RETRIES), lastException);
}
Aggregations