Search in sources :

Example 11 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class ContinuousFileMonitoringFunction method run.

@Override
public void run(SourceFunction.SourceContext<TimestampedFileInputSplit> context) throws Exception {
    Path p = new Path(path);
    FileSystem fileSystem = FileSystem.get(p.toUri());
    if (!fileSystem.exists(p)) {
        throw new FileNotFoundException("The provided file path " + path + " does not exist.");
    }
    checkpointLock = context.getCheckpointLock();
    switch(watchType) {
        case PROCESS_CONTINUOUSLY:
            while (isRunning) {
                synchronized (checkpointLock) {
                    monitorDirAndForwardSplits(fileSystem, context);
                }
                Thread.sleep(interval);
            }
            break;
        case PROCESS_ONCE:
            synchronized (checkpointLock) {
                if (globalModificationTime == Long.MIN_VALUE) {
                    monitorDirAndForwardSplits(fileSystem, context);
                    globalModificationTime = Long.MAX_VALUE;
                }
                isRunning = false;
            }
            break;
        default:
            isRunning = false;
            throw new RuntimeException("Unknown WatchType" + watchType);
    }
}
Also used : Path(org.apache.flink.core.fs.Path) FileSystem(org.apache.flink.core.fs.FileSystem) FileNotFoundException(java.io.FileNotFoundException)

Example 12 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class BlobRecoveryITCase method testBlobServerRecovery.

public static void testBlobServerRecovery(final Configuration config) throws IOException {
    final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID);
    String storagePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH) + "/" + clusterId;
    Random rand = new Random();
    BlobServer[] server = new BlobServer[2];
    InetSocketAddress[] serverAddress = new InetSocketAddress[2];
    BlobClient client = null;
    try {
        for (int i = 0; i < server.length; i++) {
            server[i] = new BlobServer(config);
            serverAddress[i] = new InetSocketAddress("localhost", server[i].getPort());
        }
        client = new BlobClient(serverAddress[0], config);
        // Random data
        byte[] expected = new byte[1024];
        rand.nextBytes(expected);
        BlobKey[] keys = new BlobKey[2];
        // Put data
        // Request 1
        keys[0] = client.put(expected);
        // Request 2
        keys[1] = client.put(expected, 32, 256);
        JobID[] jobId = new JobID[] { new JobID(), new JobID() };
        String[] testKey = new String[] { "test-key-1", "test-key-2" };
        // Request 3
        client.put(jobId[0], testKey[0], expected);
        // Request 4
        client.put(jobId[1], testKey[1], expected, 32, 256);
        // check that the storage directory exists
        final Path blobServerPath = new Path(storagePath, "blob");
        FileSystem fs = blobServerPath.getFileSystem();
        assertTrue("Unknown storage dir: " + blobServerPath, fs.exists(blobServerPath));
        // Close the client and connect to the other server
        client.close();
        client = new BlobClient(serverAddress[1], config);
        // Verify request 1
        try (InputStream is = client.get(keys[0])) {
            byte[] actual = new byte[expected.length];
            BlobUtils.readFully(is, actual, 0, expected.length, null);
            for (int i = 0; i < expected.length; i++) {
                assertEquals(expected[i], actual[i]);
            }
        }
        // Verify request 2
        try (InputStream is = client.get(keys[1])) {
            byte[] actual = new byte[256];
            BlobUtils.readFully(is, actual, 0, 256, null);
            for (int i = 32, j = 0; i < 256; i++, j++) {
                assertEquals(expected[i], actual[j]);
            }
        }
        // Verify request 3
        try (InputStream is = client.get(jobId[0], testKey[0])) {
            byte[] actual = new byte[expected.length];
            BlobUtils.readFully(is, actual, 0, expected.length, null);
            for (int i = 0; i < expected.length; i++) {
                assertEquals(expected[i], actual[i]);
            }
        }
        // Verify request 4
        try (InputStream is = client.get(jobId[1], testKey[1])) {
            byte[] actual = new byte[256];
            BlobUtils.readFully(is, actual, 0, 256, null);
            for (int i = 32, j = 0; i < 256; i++, j++) {
                assertEquals(expected[i], actual[j]);
            }
        }
        // Remove again
        client.delete(keys[0]);
        client.delete(keys[1]);
        client.delete(jobId[0], testKey[0]);
        client.delete(jobId[1], testKey[1]);
        // Verify everything is clean
        assertTrue("HA storage directory does not exist", fs.exists(new Path(storagePath)));
        if (fs.exists(blobServerPath)) {
            final org.apache.flink.core.fs.FileStatus[] recoveryFiles = fs.listStatus(blobServerPath);
            ArrayList<String> filenames = new ArrayList<String>(recoveryFiles.length);
            for (org.apache.flink.core.fs.FileStatus file : recoveryFiles) {
                filenames.add(file.toString());
            }
            fail("Unclean state backend: " + filenames);
        }
    } finally {
        for (BlobServer s : server) {
            if (s != null) {
                s.shutdown();
            }
        }
        if (client != null) {
            client.close();
        }
    }
}
Also used : InetSocketAddress(java.net.InetSocketAddress) ArrayList(java.util.ArrayList) Random(java.util.Random) FileSystem(org.apache.flink.core.fs.FileSystem) Path(org.apache.flink.core.fs.Path) InputStream(java.io.InputStream) JobID(org.apache.flink.api.common.JobID)

Example 13 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class MigrationV0ToV1Test method testSavepointMigrationV0ToV1.

/**
	 * Simple test of savepoint methods.
	 */
@Test
public void testSavepointMigrationV0ToV1() throws Exception {
    String target = tmp.getRoot().getAbsolutePath();
    assertEquals(0, tmp.getRoot().listFiles().length);
    long checkpointId = ThreadLocalRandom.current().nextLong(Integer.MAX_VALUE);
    int numTaskStates = 4;
    int numSubtaskStates = 16;
    Collection<org.apache.flink.migration.runtime.checkpoint.TaskState> expected = createTaskStatesOld(numTaskStates, numSubtaskStates);
    SavepointV0 savepoint = new SavepointV0(checkpointId, expected);
    assertEquals(SavepointV0.VERSION, savepoint.getVersion());
    assertEquals(checkpointId, savepoint.getCheckpointId());
    assertEquals(expected, savepoint.getOldTaskStates());
    assertFalse(savepoint.getOldTaskStates().isEmpty());
    Exception latestException = null;
    Path path = null;
    FSDataOutputStream fdos = null;
    FileSystem fs = null;
    try {
        // Try to create a FS output stream
        for (int attempt = 0; attempt < 10; attempt++) {
            path = new Path(target, FileUtils.getRandomFilename("savepoint-"));
            if (fs == null) {
                fs = FileSystem.get(path.toUri());
            }
            try {
                fdos = fs.create(path, false);
                break;
            } catch (Exception e) {
                latestException = e;
            }
        }
        if (fdos == null) {
            throw new IOException("Failed to create file output stream at " + path, latestException);
        }
        try (DataOutputStream dos = new DataOutputStream(fdos)) {
            dos.writeInt(SavepointStore.MAGIC_NUMBER);
            dos.writeInt(savepoint.getVersion());
            SavepointV0Serializer.INSTANCE.serializeOld(savepoint, dos);
        }
        ClassLoader cl = Thread.currentThread().getContextClassLoader();
        Savepoint sp = SavepointStore.loadSavepoint(path.toString(), cl);
        int t = 0;
        for (TaskState taskState : sp.getTaskStates()) {
            for (int p = 0; p < taskState.getParallelism(); ++p) {
                SubtaskState subtaskState = taskState.getState(p);
                ChainedStateHandle<StreamStateHandle> legacyOperatorState = subtaskState.getLegacyOperatorState();
                for (int c = 0; c < legacyOperatorState.getLength(); ++c) {
                    StreamStateHandle stateHandle = legacyOperatorState.get(c);
                    try (InputStream is = stateHandle.openInputStream()) {
                        Tuple4<Integer, Integer, Integer, Integer> expTestState = new Tuple4<>(0, t, p, c);
                        Tuple4<Integer, Integer, Integer, Integer> actTestState;
                        //check function state
                        if (p % 4 != 0) {
                            assertEquals(1, is.read());
                            actTestState = InstantiationUtil.deserializeObject(is, cl);
                            assertEquals(expTestState, actTestState);
                        } else {
                            assertEquals(0, is.read());
                        }
                        //check operator state
                        expTestState.f0 = 1;
                        actTestState = InstantiationUtil.deserializeObject(is, cl);
                        assertEquals(expTestState, actTestState);
                    }
                }
                //check keyed state
                KeyGroupsStateHandle keyGroupsStateHandle = subtaskState.getManagedKeyedState();
                if (t % 3 != 0) {
                    assertEquals(1, keyGroupsStateHandle.getNumberOfKeyGroups());
                    assertEquals(p, keyGroupsStateHandle.getGroupRangeOffsets().getKeyGroupRange().getStartKeyGroup());
                    ByteStreamStateHandle stateHandle = (ByteStreamStateHandle) keyGroupsStateHandle.getDelegateStateHandle();
                    HashMap<String, KvStateSnapshot<?, ?, ?, ?>> testKeyedState = MigrationInstantiationUtil.deserializeObject(stateHandle.getData(), cl);
                    assertEquals(2, testKeyedState.size());
                    for (KvStateSnapshot<?, ?, ?, ?> snapshot : testKeyedState.values()) {
                        MemValueState.Snapshot<?, ?, ?> castedSnapshot = (MemValueState.Snapshot<?, ?, ?>) snapshot;
                        byte[] data = castedSnapshot.getData();
                        assertEquals(t, data[0]);
                        assertEquals(p, data[1]);
                    }
                } else {
                    assertEquals(null, keyGroupsStateHandle);
                }
            }
            ++t;
        }
        savepoint.dispose();
    } finally {
        // Dispose
        SavepointStore.removeSavepointFile(path.toString());
    }
}
Also used : FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) DataOutputStream(java.io.DataOutputStream) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) SavepointV0(org.apache.flink.migration.runtime.checkpoint.savepoint.SavepointV0) FileSystem(org.apache.flink.core.fs.FileSystem) FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) Path(org.apache.flink.core.fs.Path) InputStream(java.io.InputStream) MemValueState(org.apache.flink.migration.runtime.state.memory.MemValueState) IOException(java.io.IOException) KvStateSnapshot(org.apache.flink.migration.runtime.state.KvStateSnapshot) IOException(java.io.IOException) Tuple4(org.apache.flink.api.java.tuple.Tuple4) KvStateSnapshot(org.apache.flink.migration.runtime.state.KvStateSnapshot) SubtaskState(org.apache.flink.runtime.checkpoint.SubtaskState) StreamTaskState(org.apache.flink.migration.streaming.runtime.tasks.StreamTaskState) TaskState(org.apache.flink.runtime.checkpoint.TaskState) Test(org.junit.Test)

Example 14 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class SavepointStoreTest method testStoreExternalizedCheckpointsToSameDirectory.

/**
	 * Tests that multiple externalized checkpoints can be stored to the same
	 * directory.
	 */
@Test
public void testStoreExternalizedCheckpointsToSameDirectory() throws Exception {
    String root = tmp.newFolder().getAbsolutePath();
    FileSystem fs = FileSystem.get(new Path(root).toUri());
    // Store
    SavepointV1 savepoint = new SavepointV1(1929292, SavepointV1Test.createTaskStates(4, 24));
    FileStateHandle store1 = SavepointStore.storeExternalizedCheckpointToHandle(root, savepoint);
    fs.exists(store1.getFilePath());
    assertTrue(store1.getFilePath().getPath().contains(SavepointStore.EXTERNALIZED_CHECKPOINT_METADATA_FILE));
    FileStateHandle store2 = SavepointStore.storeExternalizedCheckpointToHandle(root, savepoint);
    fs.exists(store2.getFilePath());
    assertTrue(store2.getFilePath().getPath().contains(SavepointStore.EXTERNALIZED_CHECKPOINT_METADATA_FILE));
}
Also used : Path(org.apache.flink.core.fs.Path) FileSystem(org.apache.flink.core.fs.FileSystem) FileStateHandle(org.apache.flink.runtime.state.filesystem.FileStateHandle) Test(org.junit.Test)

Example 15 with FileSystem

use of org.apache.flink.core.fs.FileSystem in project flink by apache.

the class BinaryInputFormat method createInputSplits.

@Override
public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException {
    List<FileStatus> files = this.getFiles();
    final FileSystem fs = this.filePath.getFileSystem();
    final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize;
    final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits);
    for (FileStatus file : files) {
        for (long pos = 0, length = file.getLen(); pos < length; pos += blockSize) {
            long remainingLength = Math.min(pos + blockSize, length) - pos;
            // get the block locations and make sure they are in order with respect to their offset
            final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength);
            Arrays.sort(blocks);
            inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts()));
        }
    }
    if (inputSplits.size() < minNumSplits) {
        LOG.warn(String.format("With the given block size %d, the file %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, this.filePath, minNumSplits));
        FileStatus last = files.get(files.size() - 1);
        final BlockLocation[] blocks = fs.getFileBlockLocations(last, 0, last.getLen());
        for (int index = files.size(); index < minNumSplits; index++) {
            inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts()));
        }
    }
    return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) FileStatus(org.apache.flink.core.fs.FileStatus) FileSystem(org.apache.flink.core.fs.FileSystem) ArrayList(java.util.ArrayList) BlockLocation(org.apache.flink.core.fs.BlockLocation)

Aggregations

FileSystem (org.apache.flink.core.fs.FileSystem)41 Path (org.apache.flink.core.fs.Path)34 IOException (java.io.IOException)18 FileStatus (org.apache.flink.core.fs.FileStatus)13 ArrayList (java.util.ArrayList)8 Test (org.junit.Test)8 FSDataInputStream (org.apache.flink.core.fs.FSDataInputStream)6 FSDataOutputStream (org.apache.flink.core.fs.FSDataOutputStream)6 File (java.io.File)5 URI (java.net.URI)5 URISyntaxException (java.net.URISyntaxException)4 FileNotFoundException (java.io.FileNotFoundException)3 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)3 DataInputViewStreamWrapper (org.apache.flink.core.memory.DataInputViewStreamWrapper)3 FileStateHandle (org.apache.flink.runtime.state.filesystem.FileStateHandle)3 DataOutputStream (java.io.DataOutputStream)2 InputStream (java.io.InputStream)2 Field (java.lang.reflect.Field)2 Map (java.util.Map)2 FileBaseStatistics (org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics)2