use of org.apache.flink.core.fs.FileSystem in project flink by apache.
the class AvroParquetRecordFormatTest method createReader.
// ------------------------------------------------------------------------
// helper methods
// ------------------------------------------------------------------------
private <T> StreamFormat.Reader<T> createReader(AvroParquetRecordFormat<T> format, Configuration config, Path filePath, long splitOffset, long splitLength) throws IOException {
final FileSystem fileSystem = filePath.getFileSystem();
final FileStatus fileStatus = fileSystem.getFileStatus(filePath);
final FSDataInputStream inputStream = fileSystem.open(filePath);
if (format.isSplittable()) {
inputStream.seek(splitOffset);
} else {
inputStream.seek(0);
checkArgument(splitLength == fileStatus.getLen());
}
return format.createReader(config, inputStream, fileStatus.getLen(), splitOffset + splitLength);
}
use of org.apache.flink.core.fs.FileSystem in project flink by apache.
the class DistributedCacheDfsTest method setup.
@BeforeClass
public static void setup() throws Exception {
File dataDir = TEMP_FOLDER.newFolder();
conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, dataDir.getAbsolutePath());
MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
hdfsCluster = builder.build();
String hdfsURI = "hdfs://" + NetUtils.hostAndPortToUrlString(hdfsCluster.getURI().getHost(), hdfsCluster.getNameNodePort()) + "/";
FileSystem dfs = FileSystem.get(new URI(hdfsURI));
testFile = writeFile(dfs, dfs.getHomeDirectory(), "testFile");
testDir = new Path(dfs.getHomeDirectory(), "testDir");
dfs.mkdirs(testDir);
writeFile(dfs, testDir, "testFile1");
writeFile(dfs, testDir, "testFile2");
}
use of org.apache.flink.core.fs.FileSystem in project flink by apache.
the class FileMonitoringFunction method run.
@Override
public void run(SourceContext<Tuple3<String, Long, Long>> ctx) throws Exception {
FileSystem fileSystem = FileSystem.get(new URI(path));
while (isRunning) {
List<String> files = listNewFiles(fileSystem);
for (String filePath : files) {
if (watchType == WatchType.ONLY_NEW_FILES || watchType == WatchType.REPROCESS_WITH_APPENDED) {
ctx.collect(new Tuple3<String, Long, Long>(filePath, 0L, -1L));
offsetOfFiles.put(filePath, -1L);
} else if (watchType == WatchType.PROCESS_ONLY_APPENDED) {
long offset = 0;
long fileSize = fileSystem.getFileStatus(new Path(filePath)).getLen();
if (offsetOfFiles.containsKey(filePath)) {
offset = offsetOfFiles.get(filePath);
}
ctx.collect(new Tuple3<String, Long, Long>(filePath, offset, fileSize));
offsetOfFiles.put(filePath, fileSize);
LOG.info("File processed: {}, {}, {}", filePath, offset, fileSize);
}
}
Thread.sleep(interval);
}
}
use of org.apache.flink.core.fs.FileSystem in project flink by apache.
the class FsJobArchivist method archiveJob.
/**
* Writes the given {@link AccessExecutionGraph} to the {@link FileSystem} pointed to by {@link
* JobManagerOptions#ARCHIVE_DIR}.
*
* @param rootPath directory to which the archive should be written to
* @param jobId job id
* @param jsonToArchive collection of json-path pairs to that should be archived
* @return path to where the archive was written, or null if no archive was created
* @throws IOException
*/
public static Path archiveJob(Path rootPath, JobID jobId, Collection<ArchivedJson> jsonToArchive) throws IOException {
try {
FileSystem fs = rootPath.getFileSystem();
Path path = new Path(rootPath, jobId.toString());
OutputStream out = fs.create(path, FileSystem.WriteMode.NO_OVERWRITE);
try (JsonGenerator gen = jacksonFactory.createGenerator(out, JsonEncoding.UTF8)) {
gen.writeStartObject();
gen.writeArrayFieldStart(ARCHIVE);
for (ArchivedJson archive : jsonToArchive) {
gen.writeStartObject();
gen.writeStringField(PATH, archive.getPath());
gen.writeStringField(JSON, archive.getJson());
gen.writeEndObject();
}
gen.writeEndArray();
gen.writeEndObject();
} catch (Exception e) {
fs.delete(path, false);
throw e;
}
LOG.info("Job {} has been archived at {}.", jobId, path);
return path;
} catch (IOException e) {
LOG.error("Failed to archive job.", e);
throw e;
}
}
use of org.apache.flink.core.fs.FileSystem in project flink by apache.
the class FileCacheDirectoriesTest method testDirectoryCleanUp.
@Test
public void testDirectoryCleanUp() throws Exception {
JobID jobID = new JobID();
ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
ExecutionAttemptID attemptID2 = new ExecutionAttemptID();
final String fileName = "test_file";
// copy / create the file
final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry(fileName, false, InstantiationUtil.serializeObject(permanentBlobKey), true);
Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID1);
fileCache.createTmpFile(fileName, entry, jobID, attemptID2);
final Path dstPath = copyResult.get();
final FileSystem fs = dstPath.getFileSystem();
final FileStatus fileStatus = fs.getFileStatus(dstPath);
final Path cacheFile = new Path(dstPath, "cacheFile");
assertTrue(fileStatus.isDir());
assertTrue(fs.exists(cacheFile));
fileCache.releaseJob(jobID, attemptID1);
// still should be available
assertTrue(fileStatus.isDir());
assertTrue(fs.exists(cacheFile));
fileCache.releaseJob(jobID, attemptID2);
// still should be available, file will be deleted after cleanupInterval
assertTrue(fileStatus.isDir());
assertTrue(fs.exists(cacheFile));
// after a while, the file should disappear
assertEquals(CLEANUP_INTERVAL, executorService.lastDelayMillis);
executorService.lastDeleteProcess.run();
assertFalse(fs.exists(dstPath));
assertFalse(fs.exists(cacheFile));
}
Aggregations