Search in sources :

Example 86 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class AvroParquetRecordFormatTest method createReader.

// ------------------------------------------------------------------------
// helper methods
// ------------------------------------------------------------------------
private <T> StreamFormat.Reader<T> createReader(AvroParquetRecordFormat<T> format, Configuration config, Path filePath, long splitOffset, long splitLength) throws IOException {
    final FileSystem fileSystem = filePath.getFileSystem();
    final FileStatus fileStatus = fileSystem.getFileStatus(filePath);
    final FSDataInputStream inputStream = fileSystem.open(filePath);
    if (format.isSplittable()) {
        inputStream.seek(splitOffset);
    } else {
        inputStream.seek(0);
        checkArgument(splitLength == fileStatus.getLen());
    }
    return format.createReader(config, inputStream, fileStatus.getLen(), splitOffset + splitLength);
}
Also used : FileStatus(org.apache.flink.core.fs.FileStatus) FileSystem(org.apache.flink.core.fs.FileSystem) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream)

Example 87 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class DistributedCacheDfsTest method setup.

@BeforeClass
public static void setup() throws Exception {
    File dataDir = TEMP_FOLDER.newFolder();
    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, dataDir.getAbsolutePath());
    MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
    hdfsCluster = builder.build();
    String hdfsURI = "hdfs://" + NetUtils.hostAndPortToUrlString(hdfsCluster.getURI().getHost(), hdfsCluster.getNameNodePort()) + "/";
    FileSystem dfs = FileSystem.get(new URI(hdfsURI));
    testFile = writeFile(dfs, dfs.getHomeDirectory(), "testFile");
    testDir = new Path(dfs.getHomeDirectory(), "testDir");
    dfs.mkdirs(testDir);
    writeFile(dfs, testDir, "testFile1");
    writeFile(dfs, testDir, "testFile2");
}
Also used : Path(org.apache.flink.core.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) FileSystem(org.apache.flink.core.fs.FileSystem) File(java.io.File) URI(java.net.URI) BeforeClass(org.junit.BeforeClass)

Example 88 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class FileMonitoringFunction method run.

@Override
public void run(SourceContext<Tuple3<String, Long, Long>> ctx) throws Exception {
    FileSystem fileSystem = FileSystem.get(new URI(path));
    while (isRunning) {
        List<String> files = listNewFiles(fileSystem);
        for (String filePath : files) {
            if (watchType == WatchType.ONLY_NEW_FILES || watchType == WatchType.REPROCESS_WITH_APPENDED) {
                ctx.collect(new Tuple3<String, Long, Long>(filePath, 0L, -1L));
                offsetOfFiles.put(filePath, -1L);
            } else if (watchType == WatchType.PROCESS_ONLY_APPENDED) {
                long offset = 0;
                long fileSize = fileSystem.getFileStatus(new Path(filePath)).getLen();
                if (offsetOfFiles.containsKey(filePath)) {
                    offset = offsetOfFiles.get(filePath);
                }
                ctx.collect(new Tuple3<String, Long, Long>(filePath, offset, fileSize));
                offsetOfFiles.put(filePath, fileSize);
                LOG.info("File processed: {}, {}, {}", filePath, offset, fileSize);
            }
        }
        Thread.sleep(interval);
    }
}
Also used : Path(org.apache.flink.core.fs.Path) FileSystem(org.apache.flink.core.fs.FileSystem) Tuple3(org.apache.flink.api.java.tuple.Tuple3) URI(java.net.URI)

Example 89 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class FsJobArchivist method archiveJob.

/**
 * Writes the given {@link AccessExecutionGraph} to the {@link FileSystem} pointed to by {@link
 * JobManagerOptions#ARCHIVE_DIR}.
 *
 * @param rootPath directory to which the archive should be written to
 * @param jobId job id
 * @param jsonToArchive collection of json-path pairs to that should be archived
 * @return path to where the archive was written, or null if no archive was created
 * @throws IOException
 */
public static Path archiveJob(Path rootPath, JobID jobId, Collection<ArchivedJson> jsonToArchive) throws IOException {
    try {
        FileSystem fs = rootPath.getFileSystem();
        Path path = new Path(rootPath, jobId.toString());
        OutputStream out = fs.create(path, FileSystem.WriteMode.NO_OVERWRITE);
        try (JsonGenerator gen = jacksonFactory.createGenerator(out, JsonEncoding.UTF8)) {
            gen.writeStartObject();
            gen.writeArrayFieldStart(ARCHIVE);
            for (ArchivedJson archive : jsonToArchive) {
                gen.writeStartObject();
                gen.writeStringField(PATH, archive.getPath());
                gen.writeStringField(JSON, archive.getJson());
                gen.writeEndObject();
            }
            gen.writeEndArray();
            gen.writeEndObject();
        } catch (Exception e) {
            fs.delete(path, false);
            throw e;
        }
        LOG.info("Job {} has been archived at {}.", jobId, path);
        return path;
    } catch (IOException e) {
        LOG.error("Failed to archive job.", e);
        throw e;
    }
}
Also used : Path(org.apache.flink.core.fs.Path) ArchivedJson(org.apache.flink.runtime.webmonitor.history.ArchivedJson) FileSystem(org.apache.flink.core.fs.FileSystem) OutputStream(java.io.OutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) JsonGenerator(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonGenerator) IOException(java.io.IOException) IOException(java.io.IOException)

Example 90 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class FileCacheDirectoriesTest method testDirectoryCleanUp.

@Test
public void testDirectoryCleanUp() throws Exception {
    JobID jobID = new JobID();
    ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
    ExecutionAttemptID attemptID2 = new ExecutionAttemptID();
    final String fileName = "test_file";
    // copy / create the file
    final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry(fileName, false, InstantiationUtil.serializeObject(permanentBlobKey), true);
    Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID1);
    fileCache.createTmpFile(fileName, entry, jobID, attemptID2);
    final Path dstPath = copyResult.get();
    final FileSystem fs = dstPath.getFileSystem();
    final FileStatus fileStatus = fs.getFileStatus(dstPath);
    final Path cacheFile = new Path(dstPath, "cacheFile");
    assertTrue(fileStatus.isDir());
    assertTrue(fs.exists(cacheFile));
    fileCache.releaseJob(jobID, attemptID1);
    // still should be available
    assertTrue(fileStatus.isDir());
    assertTrue(fs.exists(cacheFile));
    fileCache.releaseJob(jobID, attemptID2);
    // still should be available, file will be deleted after cleanupInterval
    assertTrue(fileStatus.isDir());
    assertTrue(fs.exists(cacheFile));
    // after a while, the file should disappear
    assertEquals(CLEANUP_INTERVAL, executorService.lastDelayMillis);
    executorService.lastDeleteProcess.run();
    assertFalse(fs.exists(dstPath));
    assertFalse(fs.exists(cacheFile));
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) FileStatus(org.apache.flink.core.fs.FileStatus) DistributedCache(org.apache.flink.api.common.cache.DistributedCache) FileSystem(org.apache.flink.core.fs.FileSystem) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

FileSystem (org.apache.flink.core.fs.FileSystem)102 Path (org.apache.flink.core.fs.Path)80 Test (org.junit.Test)49 IOException (java.io.IOException)28 File (java.io.File)24 FileStatus (org.apache.flink.core.fs.FileStatus)20 FSDataOutputStream (org.apache.flink.core.fs.FSDataOutputStream)18 FSDataInputStream (org.apache.flink.core.fs.FSDataInputStream)14 URI (java.net.URI)13 LocalFileSystem (org.apache.flink.core.fs.local.LocalFileSystem)13 ArrayList (java.util.ArrayList)10 Random (java.util.Random)8 Configuration (org.apache.flink.configuration.Configuration)8 JobID (org.apache.flink.api.common.JobID)7 FileNotFoundException (java.io.FileNotFoundException)5 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)5 InputStream (java.io.InputStream)4 URISyntaxException (java.net.URISyntaxException)4 FileBaseStatistics (org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics)4 FsCheckpointStateOutputStream (org.apache.flink.runtime.state.filesystem.FsCheckpointStreamFactory.FsCheckpointStateOutputStream)4