use of org.apache.flink.core.fs.FSDataInputStream in project flink by apache.
the class HeapKeyedStateBackend method restorePartitionedState.
@SuppressWarnings({ "unchecked" })
private void restorePartitionedState(Collection<KeyGroupsStateHandle> state) throws Exception {
final Map<Integer, String> kvStatesById = new HashMap<>();
int numRegisteredKvStates = 0;
stateTables.clear();
for (KeyGroupsStateHandle keyGroupsHandle : state) {
if (keyGroupsHandle == null) {
continue;
}
FSDataInputStream fsDataInputStream = keyGroupsHandle.openInputStream();
cancelStreamRegistry.registerClosable(fsDataInputStream);
try {
DataInputViewStreamWrapper inView = new DataInputViewStreamWrapper(fsDataInputStream);
KeyedBackendSerializationProxy serializationProxy = new KeyedBackendSerializationProxy(userCodeClassLoader);
serializationProxy.read(inView);
List<KeyedBackendSerializationProxy.StateMetaInfo<?, ?>> metaInfoList = serializationProxy.getNamedStateSerializationProxies();
for (KeyedBackendSerializationProxy.StateMetaInfo<?, ?> metaInfoSerializationProxy : metaInfoList) {
StateTable<K, ?, ?> stateTable = stateTables.get(metaInfoSerializationProxy.getStateName());
//important: only create a new table we did not already create it previously
if (null == stateTable) {
RegisteredBackendStateMetaInfo<?, ?> registeredBackendStateMetaInfo = new RegisteredBackendStateMetaInfo<>(metaInfoSerializationProxy);
stateTable = newStateTable(registeredBackendStateMetaInfo);
stateTables.put(metaInfoSerializationProxy.getStateName(), stateTable);
kvStatesById.put(numRegisteredKvStates, metaInfoSerializationProxy.getStateName());
++numRegisteredKvStates;
}
}
for (Tuple2<Integer, Long> groupOffset : keyGroupsHandle.getGroupRangeOffsets()) {
int keyGroupIndex = groupOffset.f0;
long offset = groupOffset.f1;
fsDataInputStream.seek(offset);
int writtenKeyGroupIndex = inView.readInt();
Preconditions.checkState(writtenKeyGroupIndex == keyGroupIndex, "Unexpected key-group in restore.");
for (int i = 0; i < metaInfoList.size(); i++) {
int kvStateId = inView.readShort();
StateTable<K, ?, ?> stateTable = stateTables.get(kvStatesById.get(kvStateId));
StateTableByKeyGroupReader keyGroupReader = StateTableByKeyGroupReaders.readerForVersion(stateTable, serializationProxy.getRestoredVersion());
keyGroupReader.readMappingsInKeyGroup(inView, keyGroupIndex);
}
}
} finally {
cancelStreamRegistry.unregisterClosable(fsDataInputStream);
IOUtils.closeQuietly(fsDataInputStream);
}
}
}
use of org.apache.flink.core.fs.FSDataInputStream in project flink by apache.
the class DistCp method main.
public static void main(String[] args) throws Exception {
// set up the execution environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
ParameterTool params = ParameterTool.fromArgs(args);
if (!params.has("input") || !params.has("output")) {
System.err.println("Usage: --input <path> --output <path> [--parallelism <n>]");
return;
}
final Path sourcePath = new Path(params.get("input"));
final Path targetPath = new Path(params.get("output"));
if (!isLocal(env) && !(isOnDistributedFS(sourcePath) && isOnDistributedFS(targetPath))) {
System.out.println("In a distributed mode only HDFS input/output paths are supported");
return;
}
final int parallelism = params.getInt("parallelism", 10);
if (parallelism <= 0) {
System.err.println("Parallelism should be greater than 0");
return;
}
// make parameters available in the web interface
env.getConfig().setGlobalJobParameters(params);
env.setParallelism(parallelism);
long startTime = System.currentTimeMillis();
LOGGER.info("Initializing copy tasks");
List<FileCopyTask> tasks = getCopyTasks(sourcePath);
LOGGER.info("Copy task initialization took " + (System.currentTimeMillis() - startTime) + "ms");
DataSet<FileCopyTask> inputTasks = new DataSource<>(env, new FileCopyTaskInputFormat(tasks), new GenericTypeInfo<>(FileCopyTask.class), "fileCopyTasks");
FlatMapOperator<FileCopyTask, Object> res = inputTasks.flatMap(new RichFlatMapFunction<FileCopyTask, Object>() {
private static final long serialVersionUID = 1109254230243989929L;
private LongCounter fileCounter;
private LongCounter bytesCounter;
@Override
public void open(Configuration parameters) throws Exception {
bytesCounter = getRuntimeContext().getLongCounter(BYTES_COPIED_CNT_NAME);
fileCounter = getRuntimeContext().getLongCounter(FILES_COPIED_CNT_NAME);
}
@Override
public void flatMap(FileCopyTask task, Collector<Object> out) throws Exception {
LOGGER.info("Processing task: " + task);
Path outPath = new Path(targetPath, task.getRelativePath());
FileSystem targetFs = targetPath.getFileSystem();
// creating parent folders in case of a local FS
if (!targetFs.isDistributedFS()) {
// dealing with cases like file:///tmp or just /tmp
File outFile = outPath.toUri().isAbsolute() ? new File(outPath.toUri()) : new File(outPath.toString());
File parentFile = outFile.getParentFile();
if (!parentFile.mkdirs() && !parentFile.exists()) {
throw new RuntimeException("Cannot create local file system directories: " + parentFile);
}
}
FSDataOutputStream outputStream = null;
FSDataInputStream inputStream = null;
try {
outputStream = targetFs.create(outPath, FileSystem.WriteMode.OVERWRITE);
inputStream = task.getPath().getFileSystem().open(task.getPath());
int bytes = IOUtils.copy(inputStream, outputStream);
bytesCounter.add(bytes);
} finally {
IOUtils.closeQuietly(inputStream);
IOUtils.closeQuietly(outputStream);
}
fileCounter.add(1L);
}
});
// no data sinks are needed, therefore just printing an empty result
res.print();
Map<String, Object> accumulators = env.getLastJobExecutionResult().getAllAccumulatorResults();
LOGGER.info("== COUNTERS ==");
for (Map.Entry<String, Object> e : accumulators.entrySet()) {
LOGGER.info(e.getKey() + ": " + e.getValue());
}
}
use of org.apache.flink.core.fs.FSDataInputStream in project flink by apache.
the class AbstractHadoopFileSystemITTest method testSimpleFileWriteAndRead.
@Test
public void testSimpleFileWriteAndRead() throws Exception {
final String testLine = "Hello Upload!";
final Path path = new Path(basePath, "test.txt");
try {
try (FSDataOutputStream out = fs.create(path, FileSystem.WriteMode.OVERWRITE);
OutputStreamWriter writer = new OutputStreamWriter(out, StandardCharsets.UTF_8)) {
writer.write(testLine);
}
// just in case, wait for the path to exist
checkPathExistence(path, true, consistencyToleranceNS);
try (FSDataInputStream in = fs.open(path);
InputStreamReader ir = new InputStreamReader(in, StandardCharsets.UTF_8);
BufferedReader reader = new BufferedReader(ir)) {
String line = reader.readLine();
assertEquals(testLine, line);
}
} finally {
fs.delete(path, false);
}
checkPathExistence(path, false, consistencyToleranceNS);
}
use of org.apache.flink.core.fs.FSDataInputStream in project flink by apache.
the class HadoopRecoverableWriterOldHadoopWithNoTruncateSupportTest method verifyFileContent.
private static void verifyFileContent(final Path testPath, final String expectedContent) throws IOException {
try (FSDataInputStream in = fileSystem.open(testPath);
InputStreamReader ir = new InputStreamReader(in, UTF_8);
BufferedReader reader = new BufferedReader(ir)) {
final String line = reader.readLine();
assertEquals(expectedContent, line);
}
}
use of org.apache.flink.core.fs.FSDataInputStream in project flink by apache.
the class HadoopViewFileSystemTruncateTest method verifyFileContent.
private static void verifyFileContent(final org.apache.flink.core.fs.Path testPath, final String expectedContent) throws IOException {
try (FSDataInputStream in = fSystem.open(testPath);
InputStreamReader ir = new InputStreamReader(in, UTF_8);
BufferedReader reader = new BufferedReader(ir)) {
final String line = reader.readLine();
assertEquals(expectedContent, line);
}
}
Aggregations