Search in sources :

Example 36 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class HadoopInputFormatBase method getFileStats.

// --------------------------------------------------------------------------------------------
//  Helper methods
// --------------------------------------------------------------------------------------------
private FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, org.apache.hadoop.fs.Path[] hadoopFilePaths, ArrayList<FileStatus> files) throws IOException {
    long latestModTime = 0L;
    // get the file info and check whether the cached statistics are still valid.
    for (org.apache.hadoop.fs.Path hadoopPath : hadoopFilePaths) {
        final Path filePath = new Path(hadoopPath.toUri());
        final FileSystem fs = FileSystem.get(filePath.toUri());
        final FileStatus file = fs.getFileStatus(filePath);
        latestModTime = Math.max(latestModTime, file.getModificationTime());
        // enumerate all files and check their modification time stamp.
        if (file.isDir()) {
            FileStatus[] fss = fs.listStatus(filePath);
            files.ensureCapacity(files.size() + fss.length);
            for (FileStatus s : fss) {
                if (!s.isDir()) {
                    files.add(s);
                    latestModTime = Math.max(s.getModificationTime(), latestModTime);
                }
            }
        } else {
            files.add(file);
        }
    }
    // check whether the cached statistics are still valid, if we have any
    if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
        return cachedStats;
    }
    // calculate the whole length
    long len = 0;
    for (FileStatus s : files) {
        len += s.getLen();
    }
    // sanity check
    if (len <= 0) {
        len = BaseStatistics.SIZE_UNKNOWN;
    }
    return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
}
Also used : Path(org.apache.flink.core.fs.Path) FileStatus(org.apache.flink.core.fs.FileStatus) FileBaseStatistics(org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics) FileSystem(org.apache.flink.core.fs.FileSystem)

Example 37 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class JobGraph method uploadRequiredJarFiles.

/**
	 * Uploads the previously added user jar file to the job manager through the job manager's BLOB server.
	 *
	 * @param serverAddress
	 *        the network address of the BLOB server
	 * @param blobClientConfig
	 *        the blob client configuration
	 * @throws IOException
	 *         thrown if an I/O error occurs during the upload
	 */
public void uploadRequiredJarFiles(InetSocketAddress serverAddress, Configuration blobClientConfig) throws IOException {
    if (this.userJars.isEmpty()) {
        return;
    }
    BlobClient bc = null;
    try {
        bc = new BlobClient(serverAddress, blobClientConfig);
        for (final Path jar : this.userJars) {
            final FileSystem fs = jar.getFileSystem();
            FSDataInputStream is = null;
            try {
                is = fs.open(jar);
                final BlobKey key = bc.put(is);
                this.userJarBlobKeys.add(key);
            } finally {
                if (is != null) {
                    is.close();
                }
            }
        }
    } finally {
        if (bc != null) {
            bc.close();
        }
    }
}
Also used : Path(org.apache.flink.core.fs.Path) BlobKey(org.apache.flink.runtime.blob.BlobKey) BlobClient(org.apache.flink.runtime.blob.BlobClient) FileSystem(org.apache.flink.core.fs.FileSystem) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream)

Example 38 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class FsStateBackend method validateAndNormalizeUri.

/**
	 * Checks and normalizes the checkpoint data URI. This method first checks the validity of the
	 * URI (scheme, path, availability of a matching file system) and then normalizes the URI
	 * to a path.
	 * 
	 * <p>If the URI does not include an authority, but the file system configured for the URI has an
	 * authority, then the normalized path will include this authority.
	 * 
	 * @param checkpointDataUri The URI to check and normalize.
	 * @return A normalized URI as a Path.
	 * 
	 * @throws IllegalArgumentException Thrown, if the URI misses scheme or path. 
	 * @throws IOException Thrown, if no file system can be found for the URI's scheme.
	 */
public static Path validateAndNormalizeUri(URI checkpointDataUri) throws IOException {
    final String scheme = checkpointDataUri.getScheme();
    final String path = checkpointDataUri.getPath();
    // some validity checks
    if (scheme == null) {
        throw new IllegalArgumentException("The scheme (hdfs://, file://, etc) is null. " + "Please specify the file system scheme explicitly in the URI.");
    }
    if (path == null) {
        throw new IllegalArgumentException("The path to store the checkpoint data in is null. " + "Please specify a directory path for the checkpoint data.");
    }
    if (path.length() == 0 || path.equals("/")) {
        throw new IllegalArgumentException("Cannot use the root directory for checkpoints.");
    }
    if (!FileSystem.isFlinkSupportedScheme(checkpointDataUri.getScheme())) {
        // this is because the required filesystem classes may not be available to the flink client
        return new Path(checkpointDataUri);
    } else {
        // we do a bit of work to make sure that the URI for the filesystem refers to exactly the same
        // (distributed) filesystem on all hosts and includes full host/port information, even if the
        // original URI did not include that. We count on the filesystem loading from the configuration
        // to fill in the missing data.
        // try to grab the file system for this path/URI
        FileSystem filesystem = FileSystem.get(checkpointDataUri);
        if (filesystem == null) {
            String reason = "Could not find a file system for the given scheme in" + "the available configurations.";
            LOG.warn("Could not verify checkpoint path. This might be caused by a genuine " + "problem or by the fact that the file system is not accessible from the " + "client. Reason:{}", reason);
            return new Path(checkpointDataUri);
        }
        URI fsURI = filesystem.getUri();
        try {
            URI baseURI = new URI(fsURI.getScheme(), fsURI.getAuthority(), path, null, null);
            return new Path(baseURI);
        } catch (URISyntaxException e) {
            String reason = String.format("Cannot create file system URI for checkpointDataUri %s and filesystem URI %s: " + e.toString(), checkpointDataUri, fsURI);
            LOG.warn("Could not verify checkpoint path. This might be caused by a genuine " + "problem or by the fact that the file system is not accessible from the " + "client. Reason: {}", reason);
            return new Path(checkpointDataUri);
        }
    }
}
Also used : Path(org.apache.flink.core.fs.Path) FileSystem(org.apache.flink.core.fs.FileSystem) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI)

Example 39 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class FsCheckpointStateOutputStreamTest method testCleanupWhenClosingStream.

/**
	 * Tests that the underlying stream file is deleted upon calling close.
	 */
@Test
public void testCleanupWhenClosingStream() throws IOException {
    final FileSystem fs = mock(FileSystem.class);
    final FSDataOutputStream outputStream = mock(FSDataOutputStream.class);
    final ArgumentCaptor<Path> pathCaptor = ArgumentCaptor.forClass(Path.class);
    when(fs.create(pathCaptor.capture(), anyBoolean())).thenReturn(outputStream);
    CheckpointStreamFactory.CheckpointStateOutputStream stream = new FsCheckpointStreamFactory.FsCheckpointStateOutputStream(TEMP_DIR_PATH, fs, 4, 0);
    // this should create the underlying file stream
    stream.write(new byte[] { 1, 2, 3, 4, 5 });
    verify(fs).create(any(Path.class), anyBoolean());
    stream.close();
    verify(fs).delete(eq(pathCaptor.getValue()), anyBoolean());
}
Also used : Path(org.apache.flink.core.fs.Path) FsCheckpointStateOutputStream(org.apache.flink.runtime.state.filesystem.FsCheckpointStreamFactory.FsCheckpointStateOutputStream) CheckpointStreamFactory(org.apache.flink.runtime.state.CheckpointStreamFactory) FileSystem(org.apache.flink.core.fs.FileSystem) FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) Test(org.junit.Test)

Example 40 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class FileMonitoringFunction method run.

@Override
public void run(SourceContext<Tuple3<String, Long, Long>> ctx) throws Exception {
    FileSystem fileSystem = FileSystem.get(new URI(path));
    while (isRunning) {
        List<String> files = listNewFiles(fileSystem);
        for (String filePath : files) {
            if (watchType == WatchType.ONLY_NEW_FILES || watchType == WatchType.REPROCESS_WITH_APPENDED) {
                ctx.collect(new Tuple3<String, Long, Long>(filePath, 0L, -1L));
                offsetOfFiles.put(filePath, -1L);
            } else if (watchType == WatchType.PROCESS_ONLY_APPENDED) {
                long offset = 0;
                long fileSize = fileSystem.getFileStatus(new Path(filePath)).getLen();
                if (offsetOfFiles.containsKey(filePath)) {
                    offset = offsetOfFiles.get(filePath);
                }
                ctx.collect(new Tuple3<String, Long, Long>(filePath, offset, fileSize));
                offsetOfFiles.put(filePath, fileSize);
                LOG.info("File processed: {}, {}, {}", filePath, offset, fileSize);
            }
        }
        Thread.sleep(interval);
    }
}
Also used : Path(org.apache.flink.core.fs.Path) FileSystem(org.apache.flink.core.fs.FileSystem) Tuple3(org.apache.flink.api.java.tuple.Tuple3) URI(java.net.URI)

Aggregations

FileSystem (org.apache.flink.core.fs.FileSystem)41 Path (org.apache.flink.core.fs.Path)34 IOException (java.io.IOException)18 FileStatus (org.apache.flink.core.fs.FileStatus)13 ArrayList (java.util.ArrayList)8 Test (org.junit.Test)8 FSDataInputStream (org.apache.flink.core.fs.FSDataInputStream)6 FSDataOutputStream (org.apache.flink.core.fs.FSDataOutputStream)6 File (java.io.File)5 URI (java.net.URI)5 URISyntaxException (java.net.URISyntaxException)4 FileNotFoundException (java.io.FileNotFoundException)3 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)3 DataInputViewStreamWrapper (org.apache.flink.core.memory.DataInputViewStreamWrapper)3 FileStateHandle (org.apache.flink.runtime.state.filesystem.FileStateHandle)3 DataOutputStream (java.io.DataOutputStream)2 InputStream (java.io.InputStream)2 Field (java.lang.reflect.Field)2 Map (java.util.Map)2 FileBaseStatistics (org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics)2