Search in sources :

Example 1 with ImmutablePair

use of org.apache.hudi.common.util.collection.ImmutablePair in project hudi by apache.

the class HoodieRepairTool method doRepair.

/**
 * Does repair, either in REPAIR or DRY_RUN mode.
 *
 * @param startingInstantOption {@link Option} of starting instant for scanning, can be empty.
 * @param endingInstantOption   {@link Option} of ending instant for scanning, can be empty.
 * @param isDryRun              Is dry run.
 * @throws IOException upon errors.
 */
boolean doRepair(Option<String> startingInstantOption, Option<String> endingInstantOption, boolean isDryRun) throws IOException {
    // Scans all partitions to find base and log files in the base path
    List<Path> allFilesInPartitions = HoodieDataTableUtils.getBaseAndLogFilePathsFromFileSystem(tableMetadata, cfg.basePath);
    // Buckets the files based on instant time
    // instant time -> relative paths of base and log files to base path
    Map<String, List<String>> instantToFilesMap = RepairUtils.tagInstantsOfBaseAndLogFiles(metaClient.getBasePath(), allFilesInPartitions);
    List<String> instantTimesToRepair = instantToFilesMap.keySet().stream().filter(instant -> (!startingInstantOption.isPresent() || instant.compareTo(startingInstantOption.get()) >= 0) && (!endingInstantOption.isPresent() || instant.compareTo(endingInstantOption.get()) <= 0)).collect(Collectors.toList());
    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
    HoodieArchivedTimeline archivedTimeline = metaClient.getArchivedTimeline();
    // This assumes that the archived timeline only has completed instants so this is safe
    archivedTimeline.loadCompletedInstantDetailsInMemory();
    List<ImmutablePair<String, List<String>>> instantFilesToRemove = context.parallelize(instantTimesToRepair).map(instantToRepair -> new ImmutablePair<>(instantToRepair, RepairUtils.findInstantFilesToRemove(instantToRepair, instantToFilesMap.get(instantToRepair), activeTimeline, archivedTimeline))).collectAsList();
    List<ImmutablePair<String, List<String>>> instantsWithDanglingFiles = instantFilesToRemove.stream().filter(e -> !e.getValue().isEmpty()).collect(Collectors.toList());
    printRepairInfo(instantTimesToRepair, instantsWithDanglingFiles);
    if (!isDryRun) {
        List<String> relativeFilePathsToDelete = instantsWithDanglingFiles.stream().flatMap(e -> e.getValue().stream()).collect(Collectors.toList());
        if (relativeFilePathsToDelete.size() > 0) {
            if (!backupFiles(relativeFilePathsToDelete)) {
                LOG.error("Error backing up dangling files. Exiting...");
                return false;
            }
            return deleteFiles(context, cfg.basePath, relativeFilePathsToDelete);
        }
        LOG.info(String.format("Table repair on %s is successful", cfg.basePath));
    }
    return true;
}
Also used : Path(org.apache.hadoop.fs.Path) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) FileIOUtils(org.apache.hudi.common.util.FileIOUtils) Parameter(com.beust.jcommander.Parameter) FileSystem(org.apache.hadoop.fs.FileSystem) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Option(org.apache.hudi.common.util.Option) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) ArrayList(java.util.ArrayList) SecureRandom(java.security.SecureRandom) Logger(org.apache.log4j.Logger) StringUtils(org.apache.hudi.common.util.StringUtils) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) TypedProperties(org.apache.hudi.common.config.TypedProperties) HoodieTableMetadata(org.apache.hudi.metadata.HoodieTableMetadata) JCommander(com.beust.jcommander.JCommander) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Serializable(java.io.Serializable) List(java.util.List) SerializableConfiguration(org.apache.hudi.common.config.SerializableConfiguration) FileSystemBackedTableMetadata(org.apache.hudi.metadata.FileSystemBackedTableMetadata) HoodieIOException(org.apache.hudi.exception.HoodieIOException) RepairUtils(org.apache.hudi.table.repair.RepairUtils) LogManager(org.apache.log4j.LogManager) FSUtils(org.apache.hudi.common.fs.FSUtils) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) ArrayList(java.util.ArrayList) List(java.util.List) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline)

Example 2 with ImmutablePair

use of org.apache.hudi.common.util.collection.ImmutablePair in project hudi by apache.

the class HoodieGlobalBloomIndex method tagLocationBacktoRecords.

/**
 * Tagging for global index should only consider the record key.
 */
@Override
protected <R> HoodieData<HoodieRecord<R>> tagLocationBacktoRecords(HoodiePairData<HoodieKey, HoodieRecordLocation> keyLocationPairs, HoodieData<HoodieRecord<R>> records) {
    HoodiePairData<String, HoodieRecord<R>> incomingRowKeyRecordPairs = records.mapToPair(record -> new ImmutablePair<>(record.getRecordKey(), record));
    HoodiePairData<String, Pair<HoodieRecordLocation, HoodieKey>> existingRecordKeyToRecordLocationHoodieKeyMap = keyLocationPairs.mapToPair(p -> new ImmutablePair<>(p.getKey().getRecordKey(), new ImmutablePair<>(p.getValue(), p.getKey())));
    // Here as the records might have more data than rowKeys (some rowKeys' fileId is null), so we do left outer join.
    return incomingRowKeyRecordPairs.leftOuterJoin(existingRecordKeyToRecordLocationHoodieKeyMap).values().flatMap(record -> {
        final HoodieRecord<R> hoodieRecord = record.getLeft();
        final Option<Pair<HoodieRecordLocation, HoodieKey>> recordLocationHoodieKeyPair = record.getRight();
        if (recordLocationHoodieKeyPair.isPresent()) {
            // Record key matched to file
            if (config.getBloomIndexUpdatePartitionPath() && !recordLocationHoodieKeyPair.get().getRight().getPartitionPath().equals(hoodieRecord.getPartitionPath())) {
                // Create an empty record to delete the record in the old partition
                HoodieRecord<R> deleteRecord = new HoodieAvroRecord(recordLocationHoodieKeyPair.get().getRight(), new EmptyHoodieRecordPayload());
                deleteRecord.setCurrentLocation(recordLocationHoodieKeyPair.get().getLeft());
                deleteRecord.seal();
                // Tag the incoming record for inserting to the new partition
                HoodieRecord<R> insertRecord = HoodieIndexUtils.getTaggedRecord(hoodieRecord, Option.empty());
                return Arrays.asList(deleteRecord, insertRecord).iterator();
            } else {
                // When it differs, the record will still be updated at its old partition.
                return Collections.singletonList((HoodieRecord<R>) HoodieIndexUtils.getTaggedRecord(new HoodieAvroRecord(recordLocationHoodieKeyPair.get().getRight(), (HoodieRecordPayload) hoodieRecord.getData()), Option.ofNullable(recordLocationHoodieKeyPair.get().getLeft()))).iterator();
            }
        } else {
            return Collections.singletonList((HoodieRecord<R>) HoodieIndexUtils.getTaggedRecord(hoodieRecord, Option.empty())).iterator();
        }
    });
}
Also used : ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) EmptyHoodieRecordPayload(org.apache.hudi.common.model.EmptyHoodieRecordPayload) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) Pair(org.apache.hudi.common.util.collection.Pair)

Example 3 with ImmutablePair

use of org.apache.hudi.common.util.collection.ImmutablePair in project hudi by apache.

the class ListBasedHoodieBloomIndexHelper method findMatchingFilesForRecordKeys.

@Override
public HoodiePairData<HoodieKey, HoodieRecordLocation> findMatchingFilesForRecordKeys(HoodieWriteConfig config, HoodieEngineContext context, HoodieTable hoodieTable, HoodiePairData<String, String> partitionRecordKeyPairs, HoodieData<Pair<String, HoodieKey>> fileComparisonPairs, Map<String, List<BloomIndexFileInfo>> partitionToFileInfo, Map<String, Long> recordsPerPartition) {
    List<Pair<String, HoodieKey>> fileComparisonPairList = HoodieList.getList(fileComparisonPairs).stream().sorted(Comparator.comparing(Pair::getLeft)).collect(toList());
    List<HoodieKeyLookupResult> keyLookupResults = new ArrayList<>();
    Iterator<List<HoodieKeyLookupResult>> iterator = new HoodieBaseBloomIndexCheckFunction(hoodieTable, config).apply(fileComparisonPairList.iterator());
    while (iterator.hasNext()) {
        keyLookupResults.addAll(iterator.next());
    }
    keyLookupResults = keyLookupResults.stream().filter(lr -> lr.getMatchingRecordKeys().size() > 0).collect(toList());
    return context.parallelize(keyLookupResults).flatMap(lookupResult -> lookupResult.getMatchingRecordKeys().stream().map(recordKey -> new ImmutablePair<>(lookupResult, recordKey)).iterator()).mapToPair(pair -> {
        HoodieKeyLookupResult lookupResult = pair.getLeft();
        String recordKey = pair.getRight();
        return new ImmutablePair<>(new HoodieKey(recordKey, lookupResult.getPartitionPath()), new HoodieRecordLocation(lookupResult.getBaseInstantTime(), lookupResult.getFileId()));
    });
}
Also used : ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieData(org.apache.hudi.common.data.HoodieData) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Iterator(java.util.Iterator) HoodiePairData(org.apache.hudi.common.data.HoodiePairData) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieKeyLookupResult(org.apache.hudi.io.HoodieKeyLookupResult) ArrayList(java.util.ArrayList) HoodieList(org.apache.hudi.common.data.HoodieList) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) Map(java.util.Map) HoodieKey(org.apache.hudi.common.model.HoodieKey) Comparator(java.util.Comparator) Pair(org.apache.hudi.common.util.collection.Pair) ArrayList(java.util.ArrayList) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) HoodieKeyLookupResult(org.apache.hudi.io.HoodieKeyLookupResult) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) HoodieKey(org.apache.hudi.common.model.HoodieKey) ArrayList(java.util.ArrayList) HoodieList(org.apache.hudi.common.data.HoodieList) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) Pair(org.apache.hudi.common.util.collection.Pair)

Example 4 with ImmutablePair

use of org.apache.hudi.common.util.collection.ImmutablePair in project hudi by apache.

the class DFSTestSuitePathSelector method getNextFilePathsAndMaxModificationTime.

@Override
public Pair<Option<String>, String> getNextFilePathsAndMaxModificationTime(Option<String> lastCheckpointStr, long sourceLimit) {
    Integer lastBatchId;
    Integer nextBatchId;
    try {
        if (lastCheckpointStr.isPresent()) {
            lastBatchId = Integer.parseInt(lastCheckpointStr.get());
            nextBatchId = lastBatchId + 1;
        } else {
            lastBatchId = 0;
            nextBatchId = 1;
        }
        // obtain all eligible files for the batch
        List<FileStatus> eligibleFiles = new ArrayList<>();
        FileStatus[] fileStatuses = fs.globStatus(new Path(props.getString(Config.ROOT_INPUT_PATH_PROP), "*"));
        // Say input data is as follow input/1, input/2, input/5 since 3,4 was rolled back and 5 is new generated data
        // checkpoint from the latest commit metadata will be 2 since 3,4 has been rolled back. We need to set the
        // next batch id correctly as 5 instead of 3
        Option<String> correctBatchIdDueToRollback = Option.fromJavaOptional(Arrays.stream(fileStatuses).map(f -> f.getPath().toString().split("/")[f.getPath().toString().split("/").length - 1]).filter(bid1 -> Integer.parseInt(bid1) > lastBatchId).min((bid1, bid2) -> Integer.min(Integer.parseInt(bid1), Integer.parseInt(bid2))));
        if (correctBatchIdDueToRollback.isPresent() && Integer.parseInt(correctBatchIdDueToRollback.get()) > nextBatchId) {
            nextBatchId = Integer.parseInt(correctBatchIdDueToRollback.get());
        }
        log.info("Using DFSTestSuitePathSelector, checkpoint: " + lastCheckpointStr + " sourceLimit: " + sourceLimit + " lastBatchId: " + lastBatchId + " nextBatchId: " + nextBatchId);
        for (FileStatus fileStatus : fileStatuses) {
            if (!fileStatus.isDirectory() || IGNORE_FILEPREFIX_LIST.stream().anyMatch(pfx -> fileStatus.getPath().getName().startsWith(pfx))) {
                continue;
            } else if (Integer.parseInt(fileStatus.getPath().getName()) > lastBatchId && Integer.parseInt(fileStatus.getPath().getName()) <= nextBatchId) {
                RemoteIterator<LocatedFileStatus> files = fs.listFiles(fileStatus.getPath(), true);
                while (files.hasNext()) {
                    eligibleFiles.add(files.next());
                }
            }
        }
        // no data to readAvro
        if (eligibleFiles.size() == 0) {
            return new ImmutablePair<>(Option.empty(), lastCheckpointStr.orElseGet(() -> String.valueOf(Long.MIN_VALUE)));
        }
        // readAvro the files out.
        String pathStr = eligibleFiles.stream().map(f -> f.getPath().toString()).collect(Collectors.joining(","));
        return new ImmutablePair<>(Option.ofNullable(pathStr), String.valueOf(nextBatchId));
    } catch (IOException ioe) {
        throw new HoodieIOException("Unable to readAvro from source from checkpoint: " + lastCheckpointStr, ioe);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) Arrays(java.util.Arrays) HoodieTestSuiteJob(org.apache.hudi.integ.testsuite.HoodieTestSuiteJob) Logger(org.slf4j.Logger) TypedProperties(org.apache.hudi.common.config.TypedProperties) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) LoggerFactory(org.slf4j.LoggerFactory) Option(org.apache.hudi.common.util.Option) IOException(java.io.IOException) FileStatus(org.apache.hadoop.fs.FileStatus) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) List(java.util.List) DFSPathSelector(org.apache.hudi.utilities.sources.helpers.DFSPathSelector) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) HoodieIOException(org.apache.hudi.exception.HoodieIOException) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) Pair(org.apache.hudi.common.util.collection.Pair) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) HoodieIOException(org.apache.hudi.exception.HoodieIOException) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair)

Example 5 with ImmutablePair

use of org.apache.hudi.common.util.collection.ImmutablePair in project hudi by apache.

the class TestHoodieBloomIndex method testLoadInvolvedFiles.

@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
@MethodSource("configParams")
public void testLoadInvolvedFiles(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws Exception {
    HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
    HoodieBloomIndex index = new HoodieBloomIndex(config, SparkHoodieBloomIndexHelper.getInstance());
    HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
    HoodieSparkWriteableTestTable testTable = HoodieSparkWriteableTestTable.of(metaClient, SCHEMA, metadataWriter);
    // Create some partitions, and put some files
    // "2016/01/21": 0 file
    // "2016/04/01": 1 file (2_0_20160401010101.parquet)
    // "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet, 4_0_20150312101010.parquet)
    testTable.withPartitionMetaFiles("2016/01/21", "2016/04/01", "2015/03/12");
    RawTripTestPayload rowChange1 = new RawTripTestPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
    HoodieRecord record1 = new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
    RawTripTestPayload rowChange2 = new RawTripTestPayload("{\"_row_key\":\"001\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
    HoodieRecord record2 = new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
    RawTripTestPayload rowChange3 = new RawTripTestPayload("{\"_row_key\":\"002\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
    HoodieRecord record3 = new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
    RawTripTestPayload rowChange4 = new RawTripTestPayload("{\"_row_key\":\"003\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
    HoodieRecord record4 = new HoodieAvroRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
    List<String> partitions = Arrays.asList("2016/01/21", "2016/04/01", "2015/03/12");
    List<Pair<String, BloomIndexFileInfo>> filesList = index.loadColumnRangesFromFiles(partitions, context, hoodieTable);
    // Still 0, as no valid commit
    assertEquals(0, filesList.size());
    final String fileId1 = "1";
    final String fileId2 = "2";
    final String fileId3 = "3";
    final String fileId4 = "4";
    final Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap = new HashMap<>();
    String commitTime = "20160401010101";
    Path baseFilePath = testTable.forCommit(commitTime).withInserts(partitions.get(1), fileId2, Collections.emptyList());
    long baseFileLength = fs.getFileStatus(baseFilePath).getLen();
    partitionToFilesNameLengthMap.computeIfAbsent(partitions.get(1), k -> new ArrayList<>()).add(Pair.of(fileId2, Integer.valueOf((int) baseFileLength)));
    testTable.doWriteOperation(commitTime, WriteOperationType.UPSERT, Arrays.asList(partitions.get(1)), partitionToFilesNameLengthMap, false, false);
    commitTime = "20150312101010";
    partitionToFilesNameLengthMap.clear();
    testTable.forCommit(commitTime);
    baseFilePath = testTable.withInserts(partitions.get(2), fileId1, Collections.emptyList());
    baseFileLength = fs.getFileStatus(baseFilePath).getLen();
    partitionToFilesNameLengthMap.computeIfAbsent(partitions.get(2), k -> new ArrayList<>()).add(Pair.of(fileId1, Integer.valueOf((int) baseFileLength)));
    baseFilePath = testTable.withInserts(partitions.get(2), fileId3, Collections.singletonList(record1));
    baseFileLength = fs.getFileStatus(baseFilePath).getLen();
    partitionToFilesNameLengthMap.computeIfAbsent(partitions.get(2), k -> new ArrayList<>()).add(Pair.of(fileId3, Integer.valueOf((int) baseFileLength)));
    baseFilePath = testTable.withInserts(partitions.get(2), fileId4, Arrays.asList(record2, record3, record4));
    baseFileLength = fs.getFileStatus(baseFilePath).getLen();
    partitionToFilesNameLengthMap.computeIfAbsent(partitions.get(2), k -> new ArrayList<>()).add(Pair.of(fileId4, Integer.valueOf((int) baseFileLength)));
    testTable.doWriteOperation(commitTime, WriteOperationType.UPSERT, Arrays.asList(partitions.get(2)), partitionToFilesNameLengthMap, false, false);
    filesList = index.loadColumnRangesFromFiles(partitions, context, hoodieTable);
    assertEquals(4, filesList.size());
    if (rangePruning) {
        // these files will not have the key ranges
        assertNull(filesList.get(0).getRight().getMaxRecordKey());
        assertNull(filesList.get(0).getRight().getMinRecordKey());
        assertFalse(filesList.get(1).getRight().hasKeyRanges());
        assertNotNull(filesList.get(2).getRight().getMaxRecordKey());
        assertNotNull(filesList.get(2).getRight().getMinRecordKey());
        assertTrue(filesList.get(3).getRight().hasKeyRanges());
        // no longer sorted, but should have same files.
        List<ImmutablePair<String, BloomIndexFileInfo>> expected = Arrays.asList(new ImmutablePair<>("2016/04/01", new BloomIndexFileInfo("2")), new ImmutablePair<>("2015/03/12", new BloomIndexFileInfo("1")), new ImmutablePair<>("2015/03/12", new BloomIndexFileInfo("3", "000", "000")), new ImmutablePair<>("2015/03/12", new BloomIndexFileInfo("4", "001", "003")));
        assertEquals(expected, filesList);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieTable(org.apache.hudi.table.HoodieTable) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) MethodSource(org.junit.jupiter.params.provider.MethodSource) Schema(org.apache.avro.Schema) BloomFilterFactory(org.apache.hudi.common.bloom.BloomFilterFactory) RawTripTestPayload(org.apache.hudi.common.testutils.RawTripTestPayload) UUID(java.util.UUID) Arguments(org.junit.jupiter.params.provider.Arguments) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) Test(org.junit.jupiter.api.Test) List(java.util.List) Stream(java.util.stream.Stream) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) HoodieIndexUtils(org.apache.hudi.index.HoodieIndexUtils) Assertions.assertDoesNotThrow(org.junit.jupiter.api.Assertions.assertDoesNotThrow) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) JavaRDD(org.apache.spark.api.java.JavaRDD) SchemaTestUtil.getSchemaFromResource(org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource) BloomFilter(org.apache.hudi.common.bloom.BloomFilter) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) BloomFilterTypeCode(org.apache.hudi.common.bloom.BloomFilterTypeCode) TestHoodieMetadataBase(org.apache.hudi.client.functional.TestHoodieMetadataBase) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) AfterEach(org.junit.jupiter.api.AfterEach) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Paths(java.nio.file.Paths) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieSparkWriteableTestTable(org.apache.hudi.testutils.HoodieSparkWriteableTestTable) Collections(java.util.Collections) HoodieJavaPairRDD(org.apache.hudi.data.HoodieJavaPairRDD) Pair(org.apache.hudi.common.util.collection.Pair) HashMap(java.util.HashMap) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) RawTripTestPayload(org.apache.hudi.common.testutils.RawTripTestPayload) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieSparkWriteableTestTable(org.apache.hudi.testutils.HoodieSparkWriteableTestTable) List(java.util.List) ArrayList(java.util.ArrayList) ImmutablePair(org.apache.hudi.common.util.collection.ImmutablePair) Pair(org.apache.hudi.common.util.collection.Pair) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Aggregations

ImmutablePair (org.apache.hudi.common.util.collection.ImmutablePair)14 ArrayList (java.util.ArrayList)11 List (java.util.List)10 Pair (org.apache.hudi.common.util.collection.Pair)9 IOException (java.io.IOException)8 Collectors (java.util.stream.Collectors)8 Path (org.apache.hadoop.fs.Path)8 Option (org.apache.hudi.common.util.Option)8 Map (java.util.Map)7 Arrays (java.util.Arrays)5 HashMap (java.util.HashMap)5 FileSystem (org.apache.hadoop.fs.FileSystem)5 TypedProperties (org.apache.hudi.common.config.TypedProperties)5 HoodieIOException (org.apache.hudi.exception.HoodieIOException)5 Configuration (org.apache.hadoop.conf.Configuration)4 FileStatus (org.apache.hadoop.fs.FileStatus)4 HoodieEngineContext (org.apache.hudi.common.engine.HoodieEngineContext)4 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)4 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)4 LogManager (org.apache.log4j.LogManager)4