Search in sources :

Example 36 with FSDataInputStream

use of org.apache.flink.core.fs.FSDataInputStream in project flink by apache.

the class HeapKeyedStateBackend method restorePartitionedState.

@SuppressWarnings({ "unchecked" })
private void restorePartitionedState(Collection<KeyGroupsStateHandle> state) throws Exception {
    final Map<Integer, String> kvStatesById = new HashMap<>();
    int numRegisteredKvStates = 0;
    stateTables.clear();
    for (KeyGroupsStateHandle keyGroupsHandle : state) {
        if (keyGroupsHandle == null) {
            continue;
        }
        FSDataInputStream fsDataInputStream = keyGroupsHandle.openInputStream();
        cancelStreamRegistry.registerClosable(fsDataInputStream);
        try {
            DataInputViewStreamWrapper inView = new DataInputViewStreamWrapper(fsDataInputStream);
            KeyedBackendSerializationProxy serializationProxy = new KeyedBackendSerializationProxy(userCodeClassLoader);
            serializationProxy.read(inView);
            List<KeyedBackendSerializationProxy.StateMetaInfo<?, ?>> metaInfoList = serializationProxy.getNamedStateSerializationProxies();
            for (KeyedBackendSerializationProxy.StateMetaInfo<?, ?> metaInfoSerializationProxy : metaInfoList) {
                StateTable<K, ?, ?> stateTable = stateTables.get(metaInfoSerializationProxy.getStateName());
                //important: only create a new table we did not already create it previously
                if (null == stateTable) {
                    RegisteredBackendStateMetaInfo<?, ?> registeredBackendStateMetaInfo = new RegisteredBackendStateMetaInfo<>(metaInfoSerializationProxy);
                    stateTable = newStateTable(registeredBackendStateMetaInfo);
                    stateTables.put(metaInfoSerializationProxy.getStateName(), stateTable);
                    kvStatesById.put(numRegisteredKvStates, metaInfoSerializationProxy.getStateName());
                    ++numRegisteredKvStates;
                }
            }
            for (Tuple2<Integer, Long> groupOffset : keyGroupsHandle.getGroupRangeOffsets()) {
                int keyGroupIndex = groupOffset.f0;
                long offset = groupOffset.f1;
                fsDataInputStream.seek(offset);
                int writtenKeyGroupIndex = inView.readInt();
                Preconditions.checkState(writtenKeyGroupIndex == keyGroupIndex, "Unexpected key-group in restore.");
                for (int i = 0; i < metaInfoList.size(); i++) {
                    int kvStateId = inView.readShort();
                    StateTable<K, ?, ?> stateTable = stateTables.get(kvStatesById.get(kvStateId));
                    StateTableByKeyGroupReader keyGroupReader = StateTableByKeyGroupReaders.readerForVersion(stateTable, serializationProxy.getRestoredVersion());
                    keyGroupReader.readMappingsInKeyGroup(inView, keyGroupIndex);
                }
            }
        } finally {
            cancelStreamRegistry.unregisterClosable(fsDataInputStream);
            IOUtils.closeQuietly(fsDataInputStream);
        }
    }
}
Also used : RegisteredBackendStateMetaInfo(org.apache.flink.runtime.state.RegisteredBackendStateMetaInfo) HashMap(java.util.HashMap) RegisteredBackendStateMetaInfo(org.apache.flink.runtime.state.RegisteredBackendStateMetaInfo) KeyedBackendSerializationProxy(org.apache.flink.runtime.state.KeyedBackendSerializationProxy) DataInputViewStreamWrapper(org.apache.flink.core.memory.DataInputViewStreamWrapper) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream)

Example 37 with FSDataInputStream

use of org.apache.flink.core.fs.FSDataInputStream in project flink by apache.

the class DistCp method main.

public static void main(String[] args) throws Exception {
    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    ParameterTool params = ParameterTool.fromArgs(args);
    if (!params.has("input") || !params.has("output")) {
        System.err.println("Usage: --input <path> --output <path> [--parallelism <n>]");
        return;
    }
    final Path sourcePath = new Path(params.get("input"));
    final Path targetPath = new Path(params.get("output"));
    if (!isLocal(env) && !(isOnDistributedFS(sourcePath) && isOnDistributedFS(targetPath))) {
        System.out.println("In a distributed mode only HDFS input/output paths are supported");
        return;
    }
    final int parallelism = params.getInt("parallelism", 10);
    if (parallelism <= 0) {
        System.err.println("Parallelism should be greater than 0");
        return;
    }
    // make parameters available in the web interface
    env.getConfig().setGlobalJobParameters(params);
    env.setParallelism(parallelism);
    long startTime = System.currentTimeMillis();
    LOGGER.info("Initializing copy tasks");
    List<FileCopyTask> tasks = getCopyTasks(sourcePath);
    LOGGER.info("Copy task initialization took " + (System.currentTimeMillis() - startTime) + "ms");
    DataSet<FileCopyTask> inputTasks = new DataSource<>(env, new FileCopyTaskInputFormat(tasks), new GenericTypeInfo<>(FileCopyTask.class), "fileCopyTasks");
    FlatMapOperator<FileCopyTask, Object> res = inputTasks.flatMap(new RichFlatMapFunction<FileCopyTask, Object>() {

        private static final long serialVersionUID = 1109254230243989929L;

        private LongCounter fileCounter;

        private LongCounter bytesCounter;

        @Override
        public void open(Configuration parameters) throws Exception {
            bytesCounter = getRuntimeContext().getLongCounter(BYTES_COPIED_CNT_NAME);
            fileCounter = getRuntimeContext().getLongCounter(FILES_COPIED_CNT_NAME);
        }

        @Override
        public void flatMap(FileCopyTask task, Collector<Object> out) throws Exception {
            LOGGER.info("Processing task: " + task);
            Path outPath = new Path(targetPath, task.getRelativePath());
            FileSystem targetFs = targetPath.getFileSystem();
            // creating parent folders in case of a local FS
            if (!targetFs.isDistributedFS()) {
                // dealing with cases like file:///tmp or just /tmp
                File outFile = outPath.toUri().isAbsolute() ? new File(outPath.toUri()) : new File(outPath.toString());
                File parentFile = outFile.getParentFile();
                if (!parentFile.mkdirs() && !parentFile.exists()) {
                    throw new RuntimeException("Cannot create local file system directories: " + parentFile);
                }
            }
            FSDataOutputStream outputStream = null;
            FSDataInputStream inputStream = null;
            try {
                outputStream = targetFs.create(outPath, FileSystem.WriteMode.OVERWRITE);
                inputStream = task.getPath().getFileSystem().open(task.getPath());
                int bytes = IOUtils.copy(inputStream, outputStream);
                bytesCounter.add(bytes);
            } finally {
                IOUtils.closeQuietly(inputStream);
                IOUtils.closeQuietly(outputStream);
            }
            fileCounter.add(1L);
        }
    });
    // no data sinks are needed, therefore just printing an empty result
    res.print();
    Map<String, Object> accumulators = env.getLastJobExecutionResult().getAllAccumulatorResults();
    LOGGER.info("== COUNTERS ==");
    for (Map.Entry<String, Object> e : accumulators.entrySet()) {
        LOGGER.info(e.getKey() + ": " + e.getValue());
    }
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) LongCounter(org.apache.flink.api.common.accumulators.LongCounter) FileSystem(org.apache.flink.core.fs.FileSystem) FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) Path(org.apache.flink.core.fs.Path) IOException(java.io.IOException) DataSource(org.apache.flink.api.java.operators.DataSource) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) File(java.io.File) Map(java.util.Map)

Example 38 with FSDataInputStream

use of org.apache.flink.core.fs.FSDataInputStream in project flink by apache.

the class AbstractHadoopFileSystemITTest method testSimpleFileWriteAndRead.

@Test
public void testSimpleFileWriteAndRead() throws Exception {
    final String testLine = "Hello Upload!";
    final Path path = new Path(basePath, "test.txt");
    try {
        try (FSDataOutputStream out = fs.create(path, FileSystem.WriteMode.OVERWRITE);
            OutputStreamWriter writer = new OutputStreamWriter(out, StandardCharsets.UTF_8)) {
            writer.write(testLine);
        }
        // just in case, wait for the path to exist
        checkPathExistence(path, true, consistencyToleranceNS);
        try (FSDataInputStream in = fs.open(path);
            InputStreamReader ir = new InputStreamReader(in, StandardCharsets.UTF_8);
            BufferedReader reader = new BufferedReader(ir)) {
            String line = reader.readLine();
            assertEquals(testLine, line);
        }
    } finally {
        fs.delete(path, false);
    }
    checkPathExistence(path, false, consistencyToleranceNS);
}
Also used : Path(org.apache.flink.core.fs.Path) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) OutputStreamWriter(java.io.OutputStreamWriter) FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) Test(org.junit.Test)

Example 39 with FSDataInputStream

use of org.apache.flink.core.fs.FSDataInputStream in project flink by apache.

the class HadoopRecoverableWriterOldHadoopWithNoTruncateSupportTest method verifyFileContent.

private static void verifyFileContent(final Path testPath, final String expectedContent) throws IOException {
    try (FSDataInputStream in = fileSystem.open(testPath);
        InputStreamReader ir = new InputStreamReader(in, UTF_8);
        BufferedReader reader = new BufferedReader(ir)) {
        final String line = reader.readLine();
        assertEquals(expectedContent, line);
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream)

Example 40 with FSDataInputStream

use of org.apache.flink.core.fs.FSDataInputStream in project flink by apache.

the class HadoopViewFileSystemTruncateTest method verifyFileContent.

private static void verifyFileContent(final org.apache.flink.core.fs.Path testPath, final String expectedContent) throws IOException {
    try (FSDataInputStream in = fSystem.open(testPath);
        InputStreamReader ir = new InputStreamReader(in, UTF_8);
        BufferedReader reader = new BufferedReader(ir)) {
        final String line = reader.readLine();
        assertEquals(expectedContent, line);
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream)

Aggregations

FSDataInputStream (org.apache.flink.core.fs.FSDataInputStream)58 Test (org.junit.Test)21 DataInputViewStreamWrapper (org.apache.flink.core.memory.DataInputViewStreamWrapper)14 IOException (java.io.IOException)12 FileSystem (org.apache.flink.core.fs.FileSystem)12 Path (org.apache.flink.core.fs.Path)10 FSDataOutputStream (org.apache.flink.core.fs.FSDataOutputStream)8 HashMap (java.util.HashMap)6 Map (java.util.Map)6 FileStatus (org.apache.flink.core.fs.FileStatus)6 KeyGroupsStateHandle (org.apache.flink.runtime.state.KeyGroupsStateHandle)6 BufferedReader (java.io.BufferedReader)5 InputStreamReader (java.io.InputStreamReader)5 File (java.io.File)4 DataInputView (org.apache.flink.core.memory.DataInputView)4 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)4 ObjectInputStream (java.io.ObjectInputStream)3 OutputStreamWriter (java.io.OutputStreamWriter)2 ArrayList (java.util.ArrayList)2 LocalFileSystem (org.apache.flink.core.fs.local.LocalFileSystem)2