Search in sources :

Example 61 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class BinaryInputFormat method getStatistics.

@Override
public SequentialStatistics getStatistics(BaseStatistics cachedStats) {
    final FileBaseStatistics cachedFileStats = (cachedStats != null && cachedStats instanceof FileBaseStatistics) ? (FileBaseStatistics) cachedStats : null;
    try {
        final Path filePath = this.filePath;
        // get the filesystem
        final FileSystem fs = FileSystem.get(filePath.toUri());
        final ArrayList<FileStatus> allFiles = new ArrayList<FileStatus>(1);
        // let the file input format deal with the up-to-date check and the basic size
        final FileBaseStatistics stats = getFileStats(cachedFileStats, filePath, fs, allFiles);
        if (stats == null) {
            return null;
        }
        // check whether the file stats are still sequential stats (in that case they are still valid)
        if (stats instanceof SequentialStatistics) {
            return (SequentialStatistics) stats;
        }
        return createStatistics(allFiles, stats);
    } catch (IOException ioex) {
        if (LOG.isWarnEnabled()) {
            LOG.warn(String.format("Could not determine complete statistics for file '%s' due to an I/O error", this.filePath), ioex);
        }
    } catch (Throwable t) {
        if (LOG.isErrorEnabled()) {
            LOG.error(String.format("Unexpected problem while getting the file statistics for file '%s'", this.filePath), t);
        }
    }
    // no stats available
    return null;
}
Also used : Path(org.apache.flink.core.fs.Path) FileStatus(org.apache.flink.core.fs.FileStatus) FileSystem(org.apache.flink.core.fs.FileSystem) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Example 62 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class JobGraph method uploadRequiredJarFiles.

/**
	 * Uploads the previously added user jar file to the job manager through the job manager's BLOB server.
	 *
	 * @param serverAddress
	 *        the network address of the BLOB server
	 * @param blobClientConfig
	 *        the blob client configuration
	 * @throws IOException
	 *         thrown if an I/O error occurs during the upload
	 */
public void uploadRequiredJarFiles(InetSocketAddress serverAddress, Configuration blobClientConfig) throws IOException {
    if (this.userJars.isEmpty()) {
        return;
    }
    BlobClient bc = null;
    try {
        bc = new BlobClient(serverAddress, blobClientConfig);
        for (final Path jar : this.userJars) {
            final FileSystem fs = jar.getFileSystem();
            FSDataInputStream is = null;
            try {
                is = fs.open(jar);
                final BlobKey key = bc.put(is);
                this.userJarBlobKeys.add(key);
            } finally {
                if (is != null) {
                    is.close();
                }
            }
        }
    } finally {
        if (bc != null) {
            bc.close();
        }
    }
}
Also used : Path(org.apache.flink.core.fs.Path) BlobKey(org.apache.flink.runtime.blob.BlobKey) BlobClient(org.apache.flink.runtime.blob.BlobClient) FileSystem(org.apache.flink.core.fs.FileSystem) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream)

Example 63 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class FsStateBackend method validateAndNormalizeUri.

/**
	 * Checks and normalizes the checkpoint data URI. This method first checks the validity of the
	 * URI (scheme, path, availability of a matching file system) and then normalizes the URI
	 * to a path.
	 * 
	 * <p>If the URI does not include an authority, but the file system configured for the URI has an
	 * authority, then the normalized path will include this authority.
	 * 
	 * @param checkpointDataUri The URI to check and normalize.
	 * @return A normalized URI as a Path.
	 * 
	 * @throws IllegalArgumentException Thrown, if the URI misses scheme or path. 
	 * @throws IOException Thrown, if no file system can be found for the URI's scheme.
	 */
public static Path validateAndNormalizeUri(URI checkpointDataUri) throws IOException {
    final String scheme = checkpointDataUri.getScheme();
    final String path = checkpointDataUri.getPath();
    // some validity checks
    if (scheme == null) {
        throw new IllegalArgumentException("The scheme (hdfs://, file://, etc) is null. " + "Please specify the file system scheme explicitly in the URI.");
    }
    if (path == null) {
        throw new IllegalArgumentException("The path to store the checkpoint data in is null. " + "Please specify a directory path for the checkpoint data.");
    }
    if (path.length() == 0 || path.equals("/")) {
        throw new IllegalArgumentException("Cannot use the root directory for checkpoints.");
    }
    if (!FileSystem.isFlinkSupportedScheme(checkpointDataUri.getScheme())) {
        // this is because the required filesystem classes may not be available to the flink client
        return new Path(checkpointDataUri);
    } else {
        // we do a bit of work to make sure that the URI for the filesystem refers to exactly the same
        // (distributed) filesystem on all hosts and includes full host/port information, even if the
        // original URI did not include that. We count on the filesystem loading from the configuration
        // to fill in the missing data.
        // try to grab the file system for this path/URI
        FileSystem filesystem = FileSystem.get(checkpointDataUri);
        if (filesystem == null) {
            String reason = "Could not find a file system for the given scheme in" + "the available configurations.";
            LOG.warn("Could not verify checkpoint path. This might be caused by a genuine " + "problem or by the fact that the file system is not accessible from the " + "client. Reason:{}", reason);
            return new Path(checkpointDataUri);
        }
        URI fsURI = filesystem.getUri();
        try {
            URI baseURI = new URI(fsURI.getScheme(), fsURI.getAuthority(), path, null, null);
            return new Path(baseURI);
        } catch (URISyntaxException e) {
            String reason = String.format("Cannot create file system URI for checkpointDataUri %s and filesystem URI %s: " + e.toString(), checkpointDataUri, fsURI);
            LOG.warn("Could not verify checkpoint path. This might be caused by a genuine " + "problem or by the fact that the file system is not accessible from the " + "client. Reason: {}", reason);
            return new Path(checkpointDataUri);
        }
    }
}
Also used : Path(org.apache.flink.core.fs.Path) FileSystem(org.apache.flink.core.fs.FileSystem) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI)

Example 64 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class YarnPreConfiguredMasterHaServicesTest method testCloseAndCleanup.

@Test
public void testCloseAndCleanup() throws Exception {
    final Configuration flinkConfig = new Configuration();
    flinkConfig.setString(YarnConfigOptions.APP_MASTER_RPC_ADDRESS, "localhost");
    flinkConfig.setInteger(YarnConfigOptions.APP_MASTER_RPC_PORT, 1427);
    // create the services
    YarnHighAvailabilityServices services = new YarnPreConfiguredMasterNonHaServices(flinkConfig, hadoopConfig);
    services.closeAndCleanupAllData();
    final FileSystem fileSystem = HDFS_ROOT_PATH.getFileSystem();
    final Path workDir = new Path(HDFS_CLUSTER.getFileSystem().getWorkingDirectory().toString());
    try {
        fileSystem.getFileStatus(new Path(workDir, YarnHighAvailabilityServices.FLINK_RECOVERY_DATA_DIR));
        fail("Flink recovery data directory still exists");
    } catch (FileNotFoundException e) {
    // expected, because the directory should have been cleaned up
    }
    assertTrue(services.isClosed());
    // doing another cleanup when the services are closed should fail
    try {
        services.closeAndCleanupAllData();
        fail("should fail with an IllegalStateException");
    } catch (IllegalStateException e) {
    // expected
    }
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.flink.configuration.Configuration) FileSystem(org.apache.flink.core.fs.FileSystem) FileNotFoundException(java.io.FileNotFoundException) Test(org.junit.Test)

Example 65 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class HadoopInputFormatBase method getFileStats.

// --------------------------------------------------------------------------------------------
// Helper methods
// --------------------------------------------------------------------------------------------
private FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, org.apache.hadoop.fs.Path[] hadoopFilePaths, ArrayList<FileStatus> files) throws IOException {
    long latestModTime = 0L;
    // get the file info and check whether the cached statistics are still valid.
    for (org.apache.hadoop.fs.Path hadoopPath : hadoopFilePaths) {
        final Path filePath = new Path(hadoopPath.toUri());
        final FileSystem fs = FileSystem.get(filePath.toUri());
        final FileStatus file = fs.getFileStatus(filePath);
        latestModTime = Math.max(latestModTime, file.getModificationTime());
        // enumerate all files and check their modification time stamp.
        if (file.isDir()) {
            FileStatus[] fss = fs.listStatus(filePath);
            files.ensureCapacity(files.size() + fss.length);
            for (FileStatus s : fss) {
                if (!s.isDir()) {
                    files.add(s);
                    latestModTime = Math.max(s.getModificationTime(), latestModTime);
                }
            }
        } else {
            files.add(file);
        }
    }
    // check whether the cached statistics are still valid, if we have any
    if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
        return cachedStats;
    }
    // calculate the whole length
    long len = 0;
    for (FileStatus s : files) {
        len += s.getLen();
    }
    // sanity check
    if (len <= 0) {
        len = BaseStatistics.SIZE_UNKNOWN;
    }
    return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
}
Also used : Path(org.apache.flink.core.fs.Path) FileStatus(org.apache.flink.core.fs.FileStatus) FileBaseStatistics(org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics) FileSystem(org.apache.flink.core.fs.FileSystem)

Aggregations

FileSystem (org.apache.flink.core.fs.FileSystem)102 Path (org.apache.flink.core.fs.Path)80 Test (org.junit.Test)49 IOException (java.io.IOException)28 File (java.io.File)24 FileStatus (org.apache.flink.core.fs.FileStatus)20 FSDataOutputStream (org.apache.flink.core.fs.FSDataOutputStream)18 FSDataInputStream (org.apache.flink.core.fs.FSDataInputStream)14 URI (java.net.URI)13 LocalFileSystem (org.apache.flink.core.fs.local.LocalFileSystem)13 ArrayList (java.util.ArrayList)10 Random (java.util.Random)8 Configuration (org.apache.flink.configuration.Configuration)8 JobID (org.apache.flink.api.common.JobID)7 FileNotFoundException (java.io.FileNotFoundException)5 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)5 InputStream (java.io.InputStream)4 URISyntaxException (java.net.URISyntaxException)4 FileBaseStatistics (org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics)4 FsCheckpointStateOutputStream (org.apache.flink.runtime.state.filesystem.FsCheckpointStreamFactory.FsCheckpointStateOutputStream)4