Search in sources :

Example 6 with BootstrapIndex

use of org.apache.hudi.common.bootstrap.index.BootstrapIndex in project hudi by apache.

the class TestBootstrapIndex method validateBootstrapIndex.

private void validateBootstrapIndex(Map<String, List<BootstrapFileMapping>> bootstrapMapping) {
    BootstrapIndex index = new HFileBootstrapIndex(metaClient);
    try (BootstrapIndex.IndexReader reader = index.createReader()) {
        List<String> indexedPartitions = reader.getIndexedPartitionPaths();
        assertEquals(bootstrapMapping.size(), indexedPartitions.size());
        indexedPartitions.forEach(partition -> assertTrue(PARTITION_SET.contains(partition)));
        long expNumFileGroupKeys = bootstrapMapping.values().stream().flatMap(Collection::stream).count();
        List<HoodieFileGroupId> fileGroupIds = reader.getIndexedFileGroupIds();
        long gotNumFileGroupKeys = fileGroupIds.size();
        assertEquals(expNumFileGroupKeys, gotNumFileGroupKeys);
        fileGroupIds.forEach(fgId -> assertTrue(PARTITION_SET.contains(fgId.getPartitionPath())));
        bootstrapMapping.entrySet().stream().forEach(e -> {
            List<BootstrapFileMapping> gotMapping = reader.getSourceFileMappingForPartition(e.getKey());
            List<BootstrapFileMapping> expected = new ArrayList<>(e.getValue());
            Collections.sort(gotMapping);
            Collections.sort(expected);
            assertEquals(expected, gotMapping, "Check for bootstrap index entries for partition " + e.getKey());
            List<HoodieFileGroupId> fileIds = e.getValue().stream().map(BootstrapFileMapping::getFileGroupId).collect(Collectors.toList());
            Map<HoodieFileGroupId, BootstrapFileMapping> lookupResult = reader.getSourceFileMappingForFileIds(fileIds);
            assertEquals(fileIds.size(), lookupResult.size());
            e.getValue().forEach(x -> {
                BootstrapFileMapping res = lookupResult.get(x.getFileGroupId());
                assertNotNull(res);
                assertEquals(x.getFileId(), res.getFileId());
                assertEquals(x.getPartitionPath(), res.getPartitionPath());
                assertEquals(BOOTSTRAP_BASE_PATH, res.getBootstrapBasePath());
                assertEquals(x.getBootstrapFileStatus(), res.getBootstrapFileStatus());
                assertEquals(x.getBootstrapPartitionPath(), res.getBootstrapPartitionPath());
            });
        });
    }
}
Also used : HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) ArrayList(java.util.ArrayList) NoOpBootstrapIndex(org.apache.hudi.common.bootstrap.index.NoOpBootstrapIndex) HFileBootstrapIndex(org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) HFileBootstrapIndex(org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping)

Aggregations

BootstrapIndex (org.apache.hudi.common.bootstrap.index.BootstrapIndex)6 HFileBootstrapIndex (org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex)3 NoOpBootstrapIndex (org.apache.hudi.common.bootstrap.index.NoOpBootstrapIndex)3 ArrayList (java.util.ArrayList)2 Schema (org.apache.avro.Schema)2 FullRecordBootstrapModeSelector (org.apache.hudi.client.bootstrap.selector.FullRecordBootstrapModeSelector)2 MetadataOnlyBootstrapModeSelector (org.apache.hudi.client.bootstrap.selector.MetadataOnlyBootstrapModeSelector)2 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)2 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)2 NonpartitionedKeyGenerator (org.apache.hudi.keygen.NonpartitionedKeyGenerator)2 SimpleKeyGenerator (org.apache.hudi.keygen.SimpleKeyGenerator)2 List (java.util.List)1 Properties (java.util.Properties)1 Path (org.apache.hadoop.fs.Path)1 HoodiePath (org.apache.hudi.avro.model.HoodiePath)1 IndexWriter (org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter)1 BootstrapFileMapping (org.apache.hudi.common.model.BootstrapFileMapping)1 HoodieFileGroupId (org.apache.hudi.common.model.HoodieFileGroupId)1 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)1 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)1