use of org.apache.flink.core.fs.FileSystem in project flink by apache.
the class HadoopInputFormatBase method getFileStats.
// --------------------------------------------------------------------------------------------
// Helper methods
// --------------------------------------------------------------------------------------------
private FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, org.apache.hadoop.fs.Path[] hadoopFilePaths, ArrayList<FileStatus> files) throws IOException {
long latestModTime = 0L;
// get the file info and check whether the cached statistics are still valid.
for (org.apache.hadoop.fs.Path hadoopPath : hadoopFilePaths) {
final Path filePath = new Path(hadoopPath.toUri());
final FileSystem fs = FileSystem.get(filePath.toUri());
final FileStatus file = fs.getFileStatus(filePath);
latestModTime = Math.max(latestModTime, file.getModificationTime());
// enumerate all files and check their modification time stamp.
if (file.isDir()) {
FileStatus[] fss = fs.listStatus(filePath);
files.ensureCapacity(files.size() + fss.length);
for (FileStatus s : fss) {
if (!s.isDir()) {
files.add(s);
latestModTime = Math.max(s.getModificationTime(), latestModTime);
}
}
} else {
files.add(file);
}
}
// check whether the cached statistics are still valid, if we have any
if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
return cachedStats;
}
// calculate the whole length
long len = 0;
for (FileStatus s : files) {
len += s.getLen();
}
// sanity check
if (len <= 0) {
len = BaseStatistics.SIZE_UNKNOWN;
}
return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
}
use of org.apache.flink.core.fs.FileSystem in project flink by apache.
the class JobGraph method uploadRequiredJarFiles.
/**
* Uploads the previously added user jar file to the job manager through the job manager's BLOB server.
*
* @param serverAddress
* the network address of the BLOB server
* @param blobClientConfig
* the blob client configuration
* @throws IOException
* thrown if an I/O error occurs during the upload
*/
public void uploadRequiredJarFiles(InetSocketAddress serverAddress, Configuration blobClientConfig) throws IOException {
if (this.userJars.isEmpty()) {
return;
}
BlobClient bc = null;
try {
bc = new BlobClient(serverAddress, blobClientConfig);
for (final Path jar : this.userJars) {
final FileSystem fs = jar.getFileSystem();
FSDataInputStream is = null;
try {
is = fs.open(jar);
final BlobKey key = bc.put(is);
this.userJarBlobKeys.add(key);
} finally {
if (is != null) {
is.close();
}
}
}
} finally {
if (bc != null) {
bc.close();
}
}
}
use of org.apache.flink.core.fs.FileSystem in project flink by apache.
the class FsStateBackend method validateAndNormalizeUri.
/**
* Checks and normalizes the checkpoint data URI. This method first checks the validity of the
* URI (scheme, path, availability of a matching file system) and then normalizes the URI
* to a path.
*
* <p>If the URI does not include an authority, but the file system configured for the URI has an
* authority, then the normalized path will include this authority.
*
* @param checkpointDataUri The URI to check and normalize.
* @return A normalized URI as a Path.
*
* @throws IllegalArgumentException Thrown, if the URI misses scheme or path.
* @throws IOException Thrown, if no file system can be found for the URI's scheme.
*/
public static Path validateAndNormalizeUri(URI checkpointDataUri) throws IOException {
final String scheme = checkpointDataUri.getScheme();
final String path = checkpointDataUri.getPath();
// some validity checks
if (scheme == null) {
throw new IllegalArgumentException("The scheme (hdfs://, file://, etc) is null. " + "Please specify the file system scheme explicitly in the URI.");
}
if (path == null) {
throw new IllegalArgumentException("The path to store the checkpoint data in is null. " + "Please specify a directory path for the checkpoint data.");
}
if (path.length() == 0 || path.equals("/")) {
throw new IllegalArgumentException("Cannot use the root directory for checkpoints.");
}
if (!FileSystem.isFlinkSupportedScheme(checkpointDataUri.getScheme())) {
// this is because the required filesystem classes may not be available to the flink client
return new Path(checkpointDataUri);
} else {
// we do a bit of work to make sure that the URI for the filesystem refers to exactly the same
// (distributed) filesystem on all hosts and includes full host/port information, even if the
// original URI did not include that. We count on the filesystem loading from the configuration
// to fill in the missing data.
// try to grab the file system for this path/URI
FileSystem filesystem = FileSystem.get(checkpointDataUri);
if (filesystem == null) {
String reason = "Could not find a file system for the given scheme in" + "the available configurations.";
LOG.warn("Could not verify checkpoint path. This might be caused by a genuine " + "problem or by the fact that the file system is not accessible from the " + "client. Reason:{}", reason);
return new Path(checkpointDataUri);
}
URI fsURI = filesystem.getUri();
try {
URI baseURI = new URI(fsURI.getScheme(), fsURI.getAuthority(), path, null, null);
return new Path(baseURI);
} catch (URISyntaxException e) {
String reason = String.format("Cannot create file system URI for checkpointDataUri %s and filesystem URI %s: " + e.toString(), checkpointDataUri, fsURI);
LOG.warn("Could not verify checkpoint path. This might be caused by a genuine " + "problem or by the fact that the file system is not accessible from the " + "client. Reason: {}", reason);
return new Path(checkpointDataUri);
}
}
}
use of org.apache.flink.core.fs.FileSystem in project flink by apache.
the class FsCheckpointStateOutputStreamTest method testCleanupWhenClosingStream.
/**
* Tests that the underlying stream file is deleted upon calling close.
*/
@Test
public void testCleanupWhenClosingStream() throws IOException {
final FileSystem fs = mock(FileSystem.class);
final FSDataOutputStream outputStream = mock(FSDataOutputStream.class);
final ArgumentCaptor<Path> pathCaptor = ArgumentCaptor.forClass(Path.class);
when(fs.create(pathCaptor.capture(), anyBoolean())).thenReturn(outputStream);
CheckpointStreamFactory.CheckpointStateOutputStream stream = new FsCheckpointStreamFactory.FsCheckpointStateOutputStream(TEMP_DIR_PATH, fs, 4, 0);
// this should create the underlying file stream
stream.write(new byte[] { 1, 2, 3, 4, 5 });
verify(fs).create(any(Path.class), anyBoolean());
stream.close();
verify(fs).delete(eq(pathCaptor.getValue()), anyBoolean());
}
use of org.apache.flink.core.fs.FileSystem in project flink by apache.
the class FileMonitoringFunction method run.
@Override
public void run(SourceContext<Tuple3<String, Long, Long>> ctx) throws Exception {
FileSystem fileSystem = FileSystem.get(new URI(path));
while (isRunning) {
List<String> files = listNewFiles(fileSystem);
for (String filePath : files) {
if (watchType == WatchType.ONLY_NEW_FILES || watchType == WatchType.REPROCESS_WITH_APPENDED) {
ctx.collect(new Tuple3<String, Long, Long>(filePath, 0L, -1L));
offsetOfFiles.put(filePath, -1L);
} else if (watchType == WatchType.PROCESS_ONLY_APPENDED) {
long offset = 0;
long fileSize = fileSystem.getFileStatus(new Path(filePath)).getLen();
if (offsetOfFiles.containsKey(filePath)) {
offset = offsetOfFiles.get(filePath);
}
ctx.collect(new Tuple3<String, Long, Long>(filePath, offset, fileSize));
offsetOfFiles.put(filePath, fileSize);
LOG.info("File processed: {}, {}, {}", filePath, offset, fileSize);
}
}
Thread.sleep(interval);
}
}
Aggregations