Search in sources :

Example 11 with SerializableConfiguration

use of org.apache.hudi.common.config.SerializableConfiguration in project hudi by apache.

the class WriterContext method initContext.

public void initContext(JavaSparkContext jsc) throws HoodieException {
    try {
        this.schemaProvider = UtilHelpers.createSchemaProvider(cfg.schemaProviderClassName, props, jsc);
        String schemaStr = schemaProvider.getSourceSchema().toString();
        this.hoodieTestSuiteWriter = new HoodieTestSuiteWriter(jsc, props, cfg, schemaStr);
        int inputParallelism = cfg.inputParallelism > 0 ? cfg.inputParallelism : jsc.defaultParallelism();
        this.deltaGenerator = new DeltaGenerator(new DFSDeltaConfig(DeltaOutputMode.valueOf(cfg.outputTypeName), DeltaInputType.valueOf(cfg.inputFormatName), new SerializableConfiguration(jsc.hadoopConfiguration()), cfg.inputBasePath, cfg.targetBasePath, schemaStr, cfg.limitFileSize, inputParallelism, cfg.deleteOldInput), jsc, sparkSession, schemaStr, keyGenerator);
        log.info(String.format("Initialized writerContext with: %s", schemaStr));
    } catch (Exception e) {
        throw new HoodieException("Failed to reinitialize writerContext", e);
    }
}
Also used : HoodieTestSuiteWriter(org.apache.hudi.integ.testsuite.HoodieTestSuiteWriter) DeltaGenerator(org.apache.hudi.integ.testsuite.generator.DeltaGenerator) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieException(org.apache.hudi.exception.HoodieException) DFSDeltaConfig(org.apache.hudi.integ.testsuite.configuration.DFSDeltaConfig) HoodieException(org.apache.hudi.exception.HoodieException)

Example 12 with SerializableConfiguration

use of org.apache.hudi.common.config.SerializableConfiguration in project hudi by apache.

the class TestFSUtils method testDeleteSubPathAsFile.

@Test
public void testDeleteSubPathAsFile() throws IOException {
    String rootDir = basePath + "/.hoodie/.temp";
    String subPathStr = rootDir + "/file3.txt";
    FileSystem fileSystem = metaClient.getFs();
    prepareTestDirectory(fileSystem, rootDir);
    assertTrue(FSUtils.deleteSubPath(subPathStr, new SerializableConfiguration(fileSystem.getConf()), false));
}
Also used : SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.junit.jupiter.api.Test)

Example 13 with SerializableConfiguration

use of org.apache.hudi.common.config.SerializableConfiguration in project hudi by apache.

the class StreamerUtil method createWriteClient.

/**
 * Creates the Flink write client.
 *
 * <p>This expects to be used by client, set flag {@code loadFsViewStorageConfig} to use
 * remote filesystem view storage config, or an in-memory filesystem view storage is used.
 */
@SuppressWarnings("rawtypes")
public static HoodieFlinkWriteClient createWriteClient(Configuration conf, RuntimeContext runtimeContext, boolean loadFsViewStorageConfig) {
    HoodieFlinkEngineContext context = new HoodieFlinkEngineContext(new SerializableConfiguration(getHadoopConf()), new FlinkTaskContextSupplier(runtimeContext));
    HoodieWriteConfig writeConfig = getHoodieClientConfig(conf, loadFsViewStorageConfig);
    return new HoodieFlinkWriteClient<>(context, writeConfig);
}
Also used : SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieFlinkWriteClient(org.apache.hudi.client.HoodieFlinkWriteClient) HoodieFlinkEngineContext(org.apache.hudi.client.common.HoodieFlinkEngineContext) FlinkTaskContextSupplier(org.apache.hudi.client.FlinkTaskContextSupplier)

Example 14 with SerializableConfiguration

use of org.apache.hudi.common.config.SerializableConfiguration in project hudi by apache.

the class FlinkTables method createTable.

/**
 * Creates the hoodie flink table.
 *
 * <p>This expects to be used by client.
 */
public static HoodieFlinkTable<?> createTable(Configuration conf, RuntimeContext runtimeContext) {
    HoodieFlinkEngineContext context = new HoodieFlinkEngineContext(new SerializableConfiguration(getHadoopConf()), new FlinkTaskContextSupplier(runtimeContext));
    HoodieWriteConfig writeConfig = getHoodieClientConfig(conf, true);
    return HoodieFlinkTable.create(writeConfig, context);
}
Also used : SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieFlinkEngineContext(org.apache.hudi.client.common.HoodieFlinkEngineContext) FlinkTaskContextSupplier(org.apache.hudi.client.FlinkTaskContextSupplier)

Example 15 with SerializableConfiguration

use of org.apache.hudi.common.config.SerializableConfiguration in project hudi by apache.

the class DirectWriteMarkers method createdAndMergedDataPaths.

@Override
public Set<String> createdAndMergedDataPaths(HoodieEngineContext context, int parallelism) throws IOException {
    Set<String> dataFiles = new HashSet<>();
    FileStatus[] topLevelStatuses = fs.listStatus(markerDirPath);
    List<String> subDirectories = new ArrayList<>();
    for (FileStatus topLevelStatus : topLevelStatuses) {
        if (topLevelStatus.isFile()) {
            String pathStr = topLevelStatus.getPath().toString();
            if (pathStr.contains(HoodieTableMetaClient.MARKER_EXTN) && !pathStr.endsWith(IOType.APPEND.name())) {
                dataFiles.add(translateMarkerToDataPath(pathStr));
            }
        } else {
            subDirectories.add(topLevelStatus.getPath().toString());
        }
    }
    if (subDirectories.size() > 0) {
        parallelism = Math.min(subDirectories.size(), parallelism);
        SerializableConfiguration serializedConf = new SerializableConfiguration(fs.getConf());
        context.setJobStatus(this.getClass().getSimpleName(), "Obtaining marker files for all created, merged paths");
        dataFiles.addAll(context.flatMap(subDirectories, directory -> {
            Path path = new Path(directory);
            FileSystem fileSystem = path.getFileSystem(serializedConf.get());
            RemoteIterator<LocatedFileStatus> itr = fileSystem.listFiles(path, true);
            List<String> result = new ArrayList<>();
            while (itr.hasNext()) {
                FileStatus status = itr.next();
                String pathStr = status.getPath().toString();
                if (pathStr.contains(HoodieTableMetaClient.MARKER_EXTN) && !pathStr.endsWith(IOType.APPEND.name())) {
                    result.add(translateMarkerToDataPath(pathStr));
                }
            }
            return result.stream();
        }, parallelism));
    }
    return dataFiles;
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) IOType(org.apache.hudi.common.model.IOType) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieException(org.apache.hudi.exception.HoodieException) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) Set(java.util.Set) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) MarkerUtils(org.apache.hudi.common.util.MarkerUtils) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) List(java.util.List) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) Path(org.apache.hadoop.fs.Path) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) FSUtils(org.apache.hudi.common.fs.FSUtils) Path(org.apache.hadoop.fs.Path) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) FileStatus(org.apache.hadoop.fs.FileStatus) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) ArrayList(java.util.ArrayList) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet)

Aggregations

SerializableConfiguration (org.apache.hudi.common.config.SerializableConfiguration)32 Path (org.apache.hadoop.fs.Path)20 FileSystem (org.apache.hadoop.fs.FileSystem)16 FileStatus (org.apache.hadoop.fs.FileStatus)15 List (java.util.List)14 IOException (java.io.IOException)13 Collectors (java.util.stream.Collectors)13 Map (java.util.Map)12 Test (org.junit.jupiter.api.Test)12 ArrayList (java.util.ArrayList)11 LogManager (org.apache.log4j.LogManager)10 Logger (org.apache.log4j.Logger)10 HoodieEngineContext (org.apache.hudi.common.engine.HoodieEngineContext)9 Option (org.apache.hudi.common.util.Option)9 Arrays (java.util.Arrays)8 HoodieSparkEngineContext (org.apache.hudi.client.common.HoodieSparkEngineContext)8 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)8 HoodieIOException (org.apache.hudi.exception.HoodieIOException)7 Collections (java.util.Collections)6 Configuration (org.apache.hadoop.conf.Configuration)6