use of org.apache.hudi.common.config.SerializableConfiguration in project hudi by apache.
the class WriterContext method initContext.
public void initContext(JavaSparkContext jsc) throws HoodieException {
try {
this.schemaProvider = UtilHelpers.createSchemaProvider(cfg.schemaProviderClassName, props, jsc);
String schemaStr = schemaProvider.getSourceSchema().toString();
this.hoodieTestSuiteWriter = new HoodieTestSuiteWriter(jsc, props, cfg, schemaStr);
int inputParallelism = cfg.inputParallelism > 0 ? cfg.inputParallelism : jsc.defaultParallelism();
this.deltaGenerator = new DeltaGenerator(new DFSDeltaConfig(DeltaOutputMode.valueOf(cfg.outputTypeName), DeltaInputType.valueOf(cfg.inputFormatName), new SerializableConfiguration(jsc.hadoopConfiguration()), cfg.inputBasePath, cfg.targetBasePath, schemaStr, cfg.limitFileSize, inputParallelism, cfg.deleteOldInput), jsc, sparkSession, schemaStr, keyGenerator);
log.info(String.format("Initialized writerContext with: %s", schemaStr));
} catch (Exception e) {
throw new HoodieException("Failed to reinitialize writerContext", e);
}
}
use of org.apache.hudi.common.config.SerializableConfiguration in project hudi by apache.
the class TestFSUtils method testDeleteSubPathAsFile.
@Test
public void testDeleteSubPathAsFile() throws IOException {
String rootDir = basePath + "/.hoodie/.temp";
String subPathStr = rootDir + "/file3.txt";
FileSystem fileSystem = metaClient.getFs();
prepareTestDirectory(fileSystem, rootDir);
assertTrue(FSUtils.deleteSubPath(subPathStr, new SerializableConfiguration(fileSystem.getConf()), false));
}
use of org.apache.hudi.common.config.SerializableConfiguration in project hudi by apache.
the class StreamerUtil method createWriteClient.
/**
* Creates the Flink write client.
*
* <p>This expects to be used by client, set flag {@code loadFsViewStorageConfig} to use
* remote filesystem view storage config, or an in-memory filesystem view storage is used.
*/
@SuppressWarnings("rawtypes")
public static HoodieFlinkWriteClient createWriteClient(Configuration conf, RuntimeContext runtimeContext, boolean loadFsViewStorageConfig) {
HoodieFlinkEngineContext context = new HoodieFlinkEngineContext(new SerializableConfiguration(getHadoopConf()), new FlinkTaskContextSupplier(runtimeContext));
HoodieWriteConfig writeConfig = getHoodieClientConfig(conf, loadFsViewStorageConfig);
return new HoodieFlinkWriteClient<>(context, writeConfig);
}
use of org.apache.hudi.common.config.SerializableConfiguration in project hudi by apache.
the class FlinkTables method createTable.
/**
* Creates the hoodie flink table.
*
* <p>This expects to be used by client.
*/
public static HoodieFlinkTable<?> createTable(Configuration conf, RuntimeContext runtimeContext) {
HoodieFlinkEngineContext context = new HoodieFlinkEngineContext(new SerializableConfiguration(getHadoopConf()), new FlinkTaskContextSupplier(runtimeContext));
HoodieWriteConfig writeConfig = getHoodieClientConfig(conf, true);
return HoodieFlinkTable.create(writeConfig, context);
}
use of org.apache.hudi.common.config.SerializableConfiguration in project hudi by apache.
the class DirectWriteMarkers method createdAndMergedDataPaths.
@Override
public Set<String> createdAndMergedDataPaths(HoodieEngineContext context, int parallelism) throws IOException {
Set<String> dataFiles = new HashSet<>();
FileStatus[] topLevelStatuses = fs.listStatus(markerDirPath);
List<String> subDirectories = new ArrayList<>();
for (FileStatus topLevelStatus : topLevelStatuses) {
if (topLevelStatus.isFile()) {
String pathStr = topLevelStatus.getPath().toString();
if (pathStr.contains(HoodieTableMetaClient.MARKER_EXTN) && !pathStr.endsWith(IOType.APPEND.name())) {
dataFiles.add(translateMarkerToDataPath(pathStr));
}
} else {
subDirectories.add(topLevelStatus.getPath().toString());
}
}
if (subDirectories.size() > 0) {
parallelism = Math.min(subDirectories.size(), parallelism);
SerializableConfiguration serializedConf = new SerializableConfiguration(fs.getConf());
context.setJobStatus(this.getClass().getSimpleName(), "Obtaining marker files for all created, merged paths");
dataFiles.addAll(context.flatMap(subDirectories, directory -> {
Path path = new Path(directory);
FileSystem fileSystem = path.getFileSystem(serializedConf.get());
RemoteIterator<LocatedFileStatus> itr = fileSystem.listFiles(path, true);
List<String> result = new ArrayList<>();
while (itr.hasNext()) {
FileStatus status = itr.next();
String pathStr = status.getPath().toString();
if (pathStr.contains(HoodieTableMetaClient.MARKER_EXTN) && !pathStr.endsWith(IOType.APPEND.name())) {
result.add(translateMarkerToDataPath(pathStr));
}
}
return result.stream();
}, parallelism));
}
return dataFiles;
}
Aggregations