use of org.apache.hudi.index.bloom.HoodieBloomIndex in project hudi by apache.
the class TestHoodieCompactor method testWriteStatusContentsAfterCompaction.
@Test
public void testWriteStatusContentsAfterCompaction() throws Exception {
// insert 100 records
HoodieWriteConfig config = getConfigBuilder().withCompactionConfig(HoodieCompactionConfig.newBuilder().withMaxNumDeltaCommitsBeforeCompaction(1).build()).build();
try (SparkRDDWriteClient writeClient = getHoodieWriteClient(config)) {
String newCommitTime = "100";
writeClient.startCommitWithTime(newCommitTime);
List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
JavaRDD<HoodieRecord> recordsRDD = jsc.parallelize(records, 1);
writeClient.insert(recordsRDD, newCommitTime).collect();
// Update all the 100 records
HoodieTable table = HoodieSparkTable.create(config, context);
newCommitTime = "101";
List<HoodieRecord> updatedRecords = dataGen.generateUpdates(newCommitTime, records);
JavaRDD<HoodieRecord> updatedRecordsRDD = jsc.parallelize(updatedRecords, 1);
HoodieIndex index = new HoodieBloomIndex(config, SparkHoodieBloomIndexHelper.getInstance());
JavaRDD<HoodieRecord> updatedTaggedRecordsRDD = tagLocation(index, updatedRecordsRDD, table);
writeClient.startCommitWithTime(newCommitTime);
writeClient.upsertPreppedRecords(updatedTaggedRecordsRDD, newCommitTime).collect();
metaClient.reloadActiveTimeline();
// Verify that all data file has one log file
table = HoodieSparkTable.create(config, context);
for (String partitionPath : dataGen.getPartitionPaths()) {
List<FileSlice> groupedLogFiles = table.getSliceView().getLatestFileSlices(partitionPath).collect(Collectors.toList());
for (FileSlice fileSlice : groupedLogFiles) {
assertEquals(1, fileSlice.getLogFiles().count(), "There should be 1 log file written for every data file");
}
}
// Do a compaction
table = HoodieSparkTable.create(config, context);
String compactionInstantTime = "102";
table.scheduleCompaction(context, compactionInstantTime, Option.empty());
table.getMetaClient().reloadActiveTimeline();
HoodieData<WriteStatus> result = (HoodieData<WriteStatus>) table.compact(context, compactionInstantTime).getWriteStatuses();
// Verify that all partition paths are present in the WriteStatus result
for (String partitionPath : dataGen.getPartitionPaths()) {
List<WriteStatus> writeStatuses = result.collectAsList();
assertTrue(writeStatuses.stream().filter(writeStatus -> writeStatus.getStat().getPartitionPath().contentEquals(partitionPath)).count() > 0);
}
}
}
use of org.apache.hudi.index.bloom.HoodieBloomIndex in project hudi by apache.
the class TestHoodieIndexConfigs method testCreateIndex.
@ParameterizedTest
@EnumSource(value = IndexType.class, names = { "BLOOM", "GLOBAL_BLOOM", "SIMPLE", "GLOBAL_SIMPLE", "HBASE", "BUCKET" })
public void testCreateIndex(IndexType indexType) {
HoodieWriteConfig config;
HoodieWriteConfig.Builder clientConfigBuilder = HoodieWriteConfig.newBuilder();
HoodieIndexConfig.Builder indexConfigBuilder = HoodieIndexConfig.newBuilder();
switch(indexType) {
case INMEMORY:
config = clientConfigBuilder.withPath(basePath).withIndexConfig(indexConfigBuilder.withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
assertTrue(SparkHoodieIndexFactory.createIndex(config) instanceof HoodieInMemoryHashIndex);
break;
case BLOOM:
config = clientConfigBuilder.withPath(basePath).withIndexConfig(indexConfigBuilder.withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
assertTrue(SparkHoodieIndexFactory.createIndex(config) instanceof HoodieBloomIndex);
break;
case GLOBAL_BLOOM:
config = clientConfigBuilder.withPath(basePath).withIndexConfig(indexConfigBuilder.withIndexType(IndexType.GLOBAL_BLOOM).build()).build();
assertTrue(SparkHoodieIndexFactory.createIndex(config) instanceof HoodieGlobalBloomIndex);
break;
case SIMPLE:
config = clientConfigBuilder.withPath(basePath).withIndexConfig(indexConfigBuilder.withIndexType(IndexType.SIMPLE).build()).build();
assertTrue(SparkHoodieIndexFactory.createIndex(config) instanceof HoodieSimpleIndex);
break;
case HBASE:
config = clientConfigBuilder.withPath(basePath).withIndexConfig(indexConfigBuilder.withIndexType(HoodieIndex.IndexType.HBASE).withHBaseIndexConfig(new HoodieHBaseIndexConfig.Builder().build()).build()).build();
assertTrue(SparkHoodieIndexFactory.createIndex(config) instanceof SparkHoodieHBaseIndex);
break;
case BUCKET:
config = clientConfigBuilder.withPath(basePath).withIndexConfig(indexConfigBuilder.withIndexType(IndexType.BUCKET).build()).build();
assertTrue(SparkHoodieIndexFactory.createIndex(config) instanceof HoodieBucketIndex);
break;
default:
}
}
Aggregations