use of org.apache.flink.core.fs.FileSystem in project flink by apache.
the class BinaryInputFormat method getStatistics.
@Override
public SequentialStatistics getStatistics(BaseStatistics cachedStats) {
final FileBaseStatistics cachedFileStats = (cachedStats != null && cachedStats instanceof FileBaseStatistics) ? (FileBaseStatistics) cachedStats : null;
try {
final Path filePath = this.filePath;
// get the filesystem
final FileSystem fs = FileSystem.get(filePath.toUri());
final ArrayList<FileStatus> allFiles = new ArrayList<FileStatus>(1);
// let the file input format deal with the up-to-date check and the basic size
final FileBaseStatistics stats = getFileStats(cachedFileStats, filePath, fs, allFiles);
if (stats == null) {
return null;
}
// check whether the file stats are still sequential stats (in that case they are still valid)
if (stats instanceof SequentialStatistics) {
return (SequentialStatistics) stats;
}
return createStatistics(allFiles, stats);
} catch (IOException ioex) {
if (LOG.isWarnEnabled()) {
LOG.warn(String.format("Could not determine complete statistics for file '%s' due to an I/O error", this.filePath), ioex);
}
} catch (Throwable t) {
if (LOG.isErrorEnabled()) {
LOG.error(String.format("Unexpected problem while getting the file statistics for file '%s'", this.filePath), t);
}
}
// no stats available
return null;
}
use of org.apache.flink.core.fs.FileSystem in project flink by apache.
the class JobGraph method uploadRequiredJarFiles.
/**
* Uploads the previously added user jar file to the job manager through the job manager's BLOB server.
*
* @param serverAddress
* the network address of the BLOB server
* @param blobClientConfig
* the blob client configuration
* @throws IOException
* thrown if an I/O error occurs during the upload
*/
public void uploadRequiredJarFiles(InetSocketAddress serverAddress, Configuration blobClientConfig) throws IOException {
if (this.userJars.isEmpty()) {
return;
}
BlobClient bc = null;
try {
bc = new BlobClient(serverAddress, blobClientConfig);
for (final Path jar : this.userJars) {
final FileSystem fs = jar.getFileSystem();
FSDataInputStream is = null;
try {
is = fs.open(jar);
final BlobKey key = bc.put(is);
this.userJarBlobKeys.add(key);
} finally {
if (is != null) {
is.close();
}
}
}
} finally {
if (bc != null) {
bc.close();
}
}
}
use of org.apache.flink.core.fs.FileSystem in project flink by apache.
the class FsStateBackend method validateAndNormalizeUri.
/**
* Checks and normalizes the checkpoint data URI. This method first checks the validity of the
* URI (scheme, path, availability of a matching file system) and then normalizes the URI
* to a path.
*
* <p>If the URI does not include an authority, but the file system configured for the URI has an
* authority, then the normalized path will include this authority.
*
* @param checkpointDataUri The URI to check and normalize.
* @return A normalized URI as a Path.
*
* @throws IllegalArgumentException Thrown, if the URI misses scheme or path.
* @throws IOException Thrown, if no file system can be found for the URI's scheme.
*/
public static Path validateAndNormalizeUri(URI checkpointDataUri) throws IOException {
final String scheme = checkpointDataUri.getScheme();
final String path = checkpointDataUri.getPath();
// some validity checks
if (scheme == null) {
throw new IllegalArgumentException("The scheme (hdfs://, file://, etc) is null. " + "Please specify the file system scheme explicitly in the URI.");
}
if (path == null) {
throw new IllegalArgumentException("The path to store the checkpoint data in is null. " + "Please specify a directory path for the checkpoint data.");
}
if (path.length() == 0 || path.equals("/")) {
throw new IllegalArgumentException("Cannot use the root directory for checkpoints.");
}
if (!FileSystem.isFlinkSupportedScheme(checkpointDataUri.getScheme())) {
// this is because the required filesystem classes may not be available to the flink client
return new Path(checkpointDataUri);
} else {
// we do a bit of work to make sure that the URI for the filesystem refers to exactly the same
// (distributed) filesystem on all hosts and includes full host/port information, even if the
// original URI did not include that. We count on the filesystem loading from the configuration
// to fill in the missing data.
// try to grab the file system for this path/URI
FileSystem filesystem = FileSystem.get(checkpointDataUri);
if (filesystem == null) {
String reason = "Could not find a file system for the given scheme in" + "the available configurations.";
LOG.warn("Could not verify checkpoint path. This might be caused by a genuine " + "problem or by the fact that the file system is not accessible from the " + "client. Reason:{}", reason);
return new Path(checkpointDataUri);
}
URI fsURI = filesystem.getUri();
try {
URI baseURI = new URI(fsURI.getScheme(), fsURI.getAuthority(), path, null, null);
return new Path(baseURI);
} catch (URISyntaxException e) {
String reason = String.format("Cannot create file system URI for checkpointDataUri %s and filesystem URI %s: " + e.toString(), checkpointDataUri, fsURI);
LOG.warn("Could not verify checkpoint path. This might be caused by a genuine " + "problem or by the fact that the file system is not accessible from the " + "client. Reason: {}", reason);
return new Path(checkpointDataUri);
}
}
}
use of org.apache.flink.core.fs.FileSystem in project flink by apache.
the class YarnPreConfiguredMasterHaServicesTest method testCloseAndCleanup.
@Test
public void testCloseAndCleanup() throws Exception {
final Configuration flinkConfig = new Configuration();
flinkConfig.setString(YarnConfigOptions.APP_MASTER_RPC_ADDRESS, "localhost");
flinkConfig.setInteger(YarnConfigOptions.APP_MASTER_RPC_PORT, 1427);
// create the services
YarnHighAvailabilityServices services = new YarnPreConfiguredMasterNonHaServices(flinkConfig, hadoopConfig);
services.closeAndCleanupAllData();
final FileSystem fileSystem = HDFS_ROOT_PATH.getFileSystem();
final Path workDir = new Path(HDFS_CLUSTER.getFileSystem().getWorkingDirectory().toString());
try {
fileSystem.getFileStatus(new Path(workDir, YarnHighAvailabilityServices.FLINK_RECOVERY_DATA_DIR));
fail("Flink recovery data directory still exists");
} catch (FileNotFoundException e) {
// expected, because the directory should have been cleaned up
}
assertTrue(services.isClosed());
// doing another cleanup when the services are closed should fail
try {
services.closeAndCleanupAllData();
fail("should fail with an IllegalStateException");
} catch (IllegalStateException e) {
// expected
}
}
use of org.apache.flink.core.fs.FileSystem in project flink by apache.
the class HadoopInputFormatBase method getFileStats.
// --------------------------------------------------------------------------------------------
// Helper methods
// --------------------------------------------------------------------------------------------
private FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, org.apache.hadoop.fs.Path[] hadoopFilePaths, ArrayList<FileStatus> files) throws IOException {
long latestModTime = 0L;
// get the file info and check whether the cached statistics are still valid.
for (org.apache.hadoop.fs.Path hadoopPath : hadoopFilePaths) {
final Path filePath = new Path(hadoopPath.toUri());
final FileSystem fs = FileSystem.get(filePath.toUri());
final FileStatus file = fs.getFileStatus(filePath);
latestModTime = Math.max(latestModTime, file.getModificationTime());
// enumerate all files and check their modification time stamp.
if (file.isDir()) {
FileStatus[] fss = fs.listStatus(filePath);
files.ensureCapacity(files.size() + fss.length);
for (FileStatus s : fss) {
if (!s.isDir()) {
files.add(s);
latestModTime = Math.max(s.getModificationTime(), latestModTime);
}
}
} else {
files.add(file);
}
}
// check whether the cached statistics are still valid, if we have any
if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
return cachedStats;
}
// calculate the whole length
long len = 0;
for (FileStatus s : files) {
len += s.getLen();
}
// sanity check
if (len <= 0) {
len = BaseStatistics.SIZE_UNKNOWN;
}
return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
}
Aggregations