Search in sources :

Example 26 with HoodieFileGroupId

use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.

the class AbstractTableFileSystemView method getBaseFileOn.

@Override
public final Option<HoodieBaseFile> getBaseFileOn(String partitionStr, String instantTime, String fileId) {
    try {
        readLock.lock();
        String partitionPath = formatPartitionKey(partitionStr);
        ensurePartitionLoadedCorrectly(partitionPath);
        if (isFileGroupReplacedBeforeOrOn(new HoodieFileGroupId(partitionPath, fileId), instantTime)) {
            return Option.empty();
        } else {
            return fetchHoodieFileGroup(partitionPath, fileId).map(fileGroup -> fileGroup.getAllBaseFiles().filter(baseFile -> HoodieTimeline.compareTimestamps(baseFile.getCommitTime(), HoodieTimeline.EQUALS, instantTime)).filter(df -> !isBaseFileDueToPendingCompaction(df) && !isBaseFileDueToPendingClustering(df)).findFirst().orElse(null)).map(df -> addBootstrapBaseFileIfPresent(new HoodieFileGroupId(partitionPath, fileId), df));
        }
    } finally {
        readLock.unlock();
    }
}
Also used : BootstrapBaseFileMapping(org.apache.hudi.common.model.BootstrapBaseFileMapping) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) ReadLock(java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) WriteLock(java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock) Predicate(java.util.function.Predicate) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) Serializable(java.io.Serializable) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) AbstractMap(java.util.AbstractMap) List(java.util.List) GREATER_THAN_OR_EQUALS(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS) Stream(java.util.stream.Stream) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieIOException(org.apache.hudi.exception.HoodieIOException) METADATA_BOOTSTRAP_INSTANT_TS(org.apache.hudi.common.table.timeline.HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS) LogManager(org.apache.log4j.LogManager) Comparator(java.util.Comparator) GREATER_THAN(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId)

Example 27 with HoodieFileGroupId

use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.

the class AbstractTableFileSystemView method isBaseFileDueToPendingCompaction.

/**
 * With async compaction, it is possible to see partial/complete base-files due to inflight-compactions, Ignore those
 * base-files.
 *
 * @param baseFile base File
 */
protected boolean isBaseFileDueToPendingCompaction(HoodieBaseFile baseFile) {
    final String partitionPath = getPartitionPathFromFilePath(baseFile.getPath());
    Option<Pair<String, CompactionOperation>> compactionWithInstantTime = getPendingCompactionOperationWithInstant(new HoodieFileGroupId(partitionPath, baseFile.getFileId()));
    return (compactionWithInstantTime.isPresent()) && (null != compactionWithInstantTime.get().getKey()) && baseFile.getCommitTime().equals(compactionWithInstantTime.get().getKey());
}
Also used : HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) Pair(org.apache.hudi.common.util.collection.Pair)

Example 28 with HoodieFileGroupId

use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.

the class ClusteringUtils method getAllFileGroupsInPendingClusteringPlans.

/**
 * Get filegroups to pending clustering instant mapping for all pending clustering plans.
 * This includes all clustering operations in 'requested' and 'inflight' states.
 */
public static Map<HoodieFileGroupId, HoodieInstant> getAllFileGroupsInPendingClusteringPlans(HoodieTableMetaClient metaClient) {
    Stream<Pair<HoodieInstant, HoodieClusteringPlan>> pendingClusteringPlans = getAllPendingClusteringPlans(metaClient);
    Stream<Map.Entry<HoodieFileGroupId, HoodieInstant>> resultStream = pendingClusteringPlans.flatMap(clusteringPlan -> getFileGroupEntriesInClusteringPlan(clusteringPlan.getLeft(), clusteringPlan.getRight()));
    Map<HoodieFileGroupId, HoodieInstant> resultMap;
    try {
        resultMap = resultStream.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
    } catch (Exception e) {
        if (e instanceof IllegalStateException && e.getMessage().contains("Duplicate key")) {
            throw new HoodieException("Found duplicate file groups pending clustering. If you're running deltastreamer in continuous mode, consider adding delay using --min-sync-interval-seconds. " + "Or consider setting write concurrency mode to optimistic_concurrency_control.", e);
        }
        throw new HoodieException("Error getting all file groups in pending clustering", e);
    }
    LOG.info("Found " + resultMap.size() + " files in pending clustering operations");
    return resultMap;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieException(org.apache.hudi.exception.HoodieException) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Pair(org.apache.hudi.common.util.collection.Pair)

Example 29 with HoodieFileGroupId

use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.

the class CompactionUtils method getAllPendingCompactionOperations.

/**
 * Get all PartitionPath + file-ids with pending Compaction operations and their target compaction instant time.
 *
 * @param metaClient Hoodie Table Meta Client
 */
public static Map<HoodieFileGroupId, Pair<String, HoodieCompactionOperation>> getAllPendingCompactionOperations(HoodieTableMetaClient metaClient) {
    List<Pair<HoodieInstant, HoodieCompactionPlan>> pendingCompactionPlanWithInstants = getAllPendingCompactionPlans(metaClient);
    Map<HoodieFileGroupId, Pair<String, HoodieCompactionOperation>> fgIdToPendingCompactionWithInstantMap = new HashMap<>();
    pendingCompactionPlanWithInstants.stream().flatMap(instantPlanPair -> getPendingCompactionOperations(instantPlanPair.getKey(), instantPlanPair.getValue())).forEach(pair -> {
        // on some DFSs.
        if (fgIdToPendingCompactionWithInstantMap.containsKey(pair.getKey())) {
            HoodieCompactionOperation operation = pair.getValue().getValue();
            HoodieCompactionOperation anotherOperation = fgIdToPendingCompactionWithInstantMap.get(pair.getKey()).getValue();
            if (!operation.equals(anotherOperation)) {
                String msg = "Hudi File Id (" + pair.getKey() + ") has more than 1 pending compactions. Instants: " + pair.getValue() + ", " + fgIdToPendingCompactionWithInstantMap.get(pair.getKey());
                throw new IllegalStateException(msg);
            }
        }
        fgIdToPendingCompactionWithInstantMap.put(pair.getKey(), pair.getValue());
    });
    return fgIdToPendingCompactionWithInstantMap;
}
Also used : CompactionPlanMigrator(org.apache.hudi.common.table.timeline.versioning.compaction.CompactionPlanMigrator) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) HashMap(java.util.HashMap) CompactionV1MigrationHandler(org.apache.hudi.common.table.timeline.versioning.compaction.CompactionV1MigrationHandler) Function(java.util.function.Function) Logger(org.apache.log4j.Logger) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) CompactionV2MigrationHandler(org.apache.hudi.common.table.timeline.versioning.compaction.CompactionV2MigrationHandler) Map(java.util.Map) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) List(java.util.List) Stream(java.util.stream.Stream) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) LogManager(org.apache.log4j.LogManager) Pair(org.apache.hudi.common.util.collection.Pair) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HashMap(java.util.HashMap) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) Pair(org.apache.hudi.common.util.collection.Pair)

Example 30 with HoodieFileGroupId

use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.

the class TestHoodieClientOnCopyOnWriteStorage method performClustering.

private HoodieWriteMetadata<JavaRDD<WriteStatus>> performClustering(HoodieClusteringConfig clusteringConfig, boolean populateMetaFields, boolean completeClustering, String validatorClasses, String sqlQueryForEqualityValidation, String sqlQueryForSingleResultValidation, Pair<List<HoodieRecord>, List<String>> allRecords) throws IOException {
    HoodiePreCommitValidatorConfig validatorConfig = HoodiePreCommitValidatorConfig.newBuilder().withPreCommitValidator(StringUtils.nullToEmpty(validatorClasses)).withPrecommitValidatorEqualitySqlQueries(sqlQueryForEqualityValidation).withPrecommitValidatorSingleResultSqlQueries(sqlQueryForSingleResultValidation).build();
    HoodieWriteConfig config = getConfigBuilder().withAutoCommit(false).withPreCommitValidatorConfig(validatorConfig).withProps(populateMetaFields ? new Properties() : getPropertiesForKeyGen()).withClusteringConfig(clusteringConfig).build();
    // create client with new config.
    SparkRDDWriteClient client = getHoodieWriteClient(config);
    String clusteringCommitTime = client.scheduleClustering(Option.empty()).get().toString();
    HoodieWriteMetadata<JavaRDD<WriteStatus>> clusterMetadata = client.cluster(clusteringCommitTime, completeClustering);
    if (config.isPreserveHoodieCommitMetadataForClustering() && config.populateMetaFields()) {
        verifyRecordsWrittenWithPreservedMetadata(new HashSet<>(allRecords.getRight()), allRecords.getLeft(), clusterMetadata.getWriteStatuses().collect());
    } else {
        verifyRecordsWritten(clusteringCommitTime, populateMetaFields, allRecords.getLeft(), clusterMetadata.getWriteStatuses().collect(), config);
    }
    Set<HoodieFileGroupId> replacedFileIds = new HashSet<>();
    clusterMetadata.getPartitionToReplaceFileIds().entrySet().forEach(partitionFiles -> partitionFiles.getValue().stream().forEach(file -> replacedFileIds.add(new HoodieFileGroupId(partitionFiles.getKey(), file))));
    return clusterMetadata;
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) FileIOUtils(org.apache.hudi.common.util.FileIOUtils) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) SparkSingleFileSortPlanStrategy(org.apache.hudi.client.clustering.plan.strategy.SparkSingleFileSortPlanStrategy) SparkTaskContextSupplier(org.apache.hudi.client.SparkTaskContextSupplier) HoodieWriteHelper(org.apache.hudi.table.action.commit.HoodieWriteHelper) BaseKeyGenerator(org.apache.hudi.keygen.BaseKeyGenerator) Future(java.util.concurrent.Future) Map(java.util.Map) EAGER(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy.EAGER) Tag(org.junit.jupiter.api.Tag) HoodieWriteResult(org.apache.hudi.client.HoodieWriteResult) REQUESTED(org.apache.hudi.common.table.timeline.HoodieInstant.State.REQUESTED) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) BaseHoodieWriteClient(org.apache.hudi.client.BaseHoodieWriteClient) IndexType(org.apache.hudi.index.HoodieIndex.IndexType) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) Set(java.util.Set) VERSION_0(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion.VERSION_0) Arguments(org.junit.jupiter.params.provider.Arguments) HoodieIndex(org.apache.hudi.index.HoodieIndex) Executors(java.util.concurrent.Executors) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) Stream(java.util.stream.Stream) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieClientTestUtils(org.apache.hudi.testutils.HoodieClientTestUtils) SqlQuerySingleResultPreCommitValidator(org.apache.hudi.client.validator.SqlQuerySingleResultPreCommitValidator) DEFAULT_THIRD_PARTITION_PATH(org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH) Mockito.mock(org.mockito.Mockito.mock) HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) Assertions.fail(org.junit.jupiter.api.Assertions.fail) Dataset(org.apache.spark.sql.Dataset) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) DEFAULT_FIRST_PARTITION_PATH(org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH) HoodieValidationException(org.apache.hudi.exception.HoodieValidationException) ArrayList(java.util.ArrayList) MarkerType(org.apache.hudi.common.table.marker.MarkerType) StringUtils(org.apache.hudi.common.util.StringUtils) KeyGenerator(org.apache.hudi.keygen.KeyGenerator) BulkInsertPartitioner(org.apache.hudi.table.BulkInsertPartitioner) Transformations.recordsToRecordKeySet(org.apache.hudi.common.testutils.Transformations.recordsToRecordKeySet) EXECUTION_STRATEGY_CLASS_NAME(org.apache.hudi.config.HoodieClusteringConfig.EXECUTION_STRATEGY_CLASS_NAME) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) JavaRDD(org.apache.spark.api.java.JavaRDD) TimelineLayoutVersion(org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion) ValueSource(org.junit.jupiter.params.provider.ValueSource) ConsistencyGuardConfig(org.apache.hudi.common.fs.ConsistencyGuardConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) HoodieData(org.apache.hudi.common.data.HoodieData) RDDCustomColumnsSortPartitioner(org.apache.hudi.execution.bulkinsert.RDDCustomColumnsSortPartitioner) Properties(java.util.Properties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) SqlQueryEqualityPreCommitValidator(org.apache.hudi.client.validator.SqlQueryEqualityPreCommitValidator) DEFAULT_SECOND_PARTITION_PATH(org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) Row(org.apache.spark.sql.Row) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieCorruptedDataException(org.apache.hudi.exception.HoodieCorruptedDataException) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieSparkWriteableTestTable(org.apache.hudi.testutils.HoodieSparkWriteableTestTable) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) COMPLETED(org.apache.hudi.common.table.timeline.HoodieInstant.State.COMPLETED) REPLACE_COMMIT_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.REPLACE_COMMIT_ACTION) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) Logger(org.apache.log4j.Logger) HoodieMergeHandle(org.apache.hudi.io.HoodieMergeHandle) CLEAN_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.CLEAN_ACTION) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) Path(org.apache.hadoop.fs.Path) HoodieSparkKeyGeneratorFactory(org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory) MethodSource(org.junit.jupiter.params.provider.MethodSource) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) SparkSingleFileSortExecutionStrategy(org.apache.hudi.client.clustering.run.strategy.SparkSingleFileSortExecutionStrategy) HoodiePreCommitValidatorConfig(org.apache.hudi.config.HoodiePreCommitValidatorConfig) TRIP_EXAMPLE_SCHEMA(org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA) IOType(org.apache.hudi.common.model.IOType) RawTripTestPayload(org.apache.hudi.common.testutils.RawTripTestPayload) Collection(java.util.Collection) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) MarkerUtils(org.apache.hudi.common.util.MarkerUtils) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) List(java.util.List) FileCreateUtils.getBaseFileCountsForPaths(org.apache.hudi.common.testutils.FileCreateUtils.getBaseFileCountsForPaths) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) ROLLBACK_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.ROLLBACK_ACTION) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) NotNull(org.jetbrains.annotations.NotNull) HoodieInsertException(org.apache.hudi.exception.HoodieInsertException) Transformations.randomSelectAsHoodieKeys(org.apache.hudi.common.testutils.Transformations.randomSelectAsHoodieKeys) INFLIGHT(org.apache.hudi.common.table.timeline.HoodieInstant.State.INFLIGHT) COMMIT_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.COMMIT_ACTION) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) EnumSource(org.junit.jupiter.params.provider.EnumSource) HashMap(java.util.HashMap) HashSet(java.util.HashSet) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieSparkCopyOnWriteTable(org.apache.hudi.table.HoodieSparkCopyOnWriteTable) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) ExecutorService(java.util.concurrent.ExecutorService) GenericRecord(org.apache.avro.generic.GenericRecord) ASYNC_CLUSTERING_ENABLE(org.apache.hudi.config.HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE) TypedProperties(org.apache.hudi.common.config.TypedProperties) NULL_SCHEMA(org.apache.hudi.common.testutils.HoodieTestDataGenerator.NULL_SCHEMA) Mockito.when(org.mockito.Mockito.when) FileCreateUtils(org.apache.hudi.common.testutils.FileCreateUtils) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) ClusteringTestUtils(org.apache.hudi.common.testutils.ClusteringTestUtils) SparkPreCommitValidator(org.apache.hudi.client.validator.SparkPreCommitValidator) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) HoodieClusteringConfig(org.apache.hudi.config.HoodieClusteringConfig) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodiePreCommitValidatorConfig(org.apache.hudi.config.HoodiePreCommitValidatorConfig) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Properties(java.util.Properties) TypedProperties(org.apache.hudi.common.config.TypedProperties) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) HashSet(java.util.HashSet)

Aggregations

HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)35 Pair (org.apache.hudi.common.util.collection.Pair)24 IOException (java.io.IOException)23 List (java.util.List)20 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)20 Collectors (java.util.stream.Collectors)19 Map (java.util.Map)18 ArrayList (java.util.ArrayList)17 Option (org.apache.hudi.common.util.Option)17 LogManager (org.apache.log4j.LogManager)17 Logger (org.apache.log4j.Logger)17 FileSlice (org.apache.hudi.common.model.FileSlice)16 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)16 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)16 Set (java.util.Set)15 Path (org.apache.hadoop.fs.Path)15 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)15 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)14 Arrays (java.util.Arrays)12 FSUtils (org.apache.hudi.common.fs.FSUtils)12