use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.
the class TestFlinkHoodieBloomIndex method testLoadInvolvedFiles.
@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
@MethodSource("configParams")
public void testLoadInvolvedFiles(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws Exception {
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
HoodieBloomIndex index = new HoodieBloomIndex(config, ListBasedHoodieBloomIndexHelper.getInstance());
HoodieTable hoodieTable = HoodieFlinkTable.create(config, context, metaClient, false);
HoodieFlinkWriteableTestTable testTable = HoodieFlinkWriteableTestTable.of(hoodieTable, SCHEMA);
// Create some partitions, and put some files
// "2016/01/21": 0 file
// "2016/04/01": 1 file (2_0_20160401010101.parquet)
// "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet, 4_0_20150312101010.parquet)
testTable.withPartitionMetaFiles("2016/01/21", "2016/04/01", "2015/03/12");
RawTripTestPayload rowChange1 = new RawTripTestPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record1 = new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
RawTripTestPayload rowChange2 = new RawTripTestPayload("{\"_row_key\":\"001\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record2 = new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
RawTripTestPayload rowChange3 = new RawTripTestPayload("{\"_row_key\":\"002\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record3 = new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
RawTripTestPayload rowChange4 = new RawTripTestPayload("{\"_row_key\":\"003\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
HoodieRecord record4 = new HoodieAvroRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
List<String> partitions = asList("2016/01/21", "2016/04/01", "2015/03/12");
List<Pair<String, BloomIndexFileInfo>> filesList = index.loadColumnRangesFromFiles(partitions, context, hoodieTable);
// Still 0, as no valid commit
assertEquals(0, filesList.size());
testTable.addCommit("20160401010101").withInserts("2016/04/01", "2");
testTable.addCommit("20150312101010").withInserts("2015/03/12", "1").withInserts("2015/03/12", "3", record1).withInserts("2015/03/12", "4", record2, record3, record4);
metaClient.reloadActiveTimeline();
filesList = index.loadColumnRangesFromFiles(partitions, context, hoodieTable);
assertEquals(4, filesList.size());
if (rangePruning) {
// these files will not have the key ranges
assertNull(filesList.get(0).getRight().getMaxRecordKey());
assertNull(filesList.get(0).getRight().getMinRecordKey());
assertFalse(filesList.get(1).getRight().hasKeyRanges());
assertNotNull(filesList.get(2).getRight().getMaxRecordKey());
assertNotNull(filesList.get(2).getRight().getMinRecordKey());
assertTrue(filesList.get(3).getRight().hasKeyRanges());
// no longer sorted, but should have same files.
List<Pair<String, BloomIndexFileInfo>> expected = asList(Pair.of("2016/04/01", new BloomIndexFileInfo("2")), Pair.of("2015/03/12", new BloomIndexFileInfo("1")), Pair.of("2015/03/12", new BloomIndexFileInfo("3", "000", "000")), Pair.of("2015/03/12", new BloomIndexFileInfo("4", "001", "003")));
assertEquals(expected, filesList);
}
}
use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.
the class JavaDeleteHelper method execute.
@Override
public HoodieWriteMetadata<List<WriteStatus>> execute(String instantTime, List<HoodieKey> keys, HoodieEngineContext context, HoodieWriteConfig config, HoodieTable<EmptyHoodieRecordPayload, List<HoodieRecord<EmptyHoodieRecordPayload>>, List<HoodieKey>, List<WriteStatus>> table, BaseCommitActionExecutor<EmptyHoodieRecordPayload, List<HoodieRecord<EmptyHoodieRecordPayload>>, List<HoodieKey>, List<WriteStatus>, R> deleteExecutor) {
try {
HoodieWriteMetadata<List<WriteStatus>> result = null;
List<HoodieKey> dedupedKeys = keys;
final int parallelism = config.getDeleteShuffleParallelism();
if (config.shouldCombineBeforeDelete()) {
// De-dupe/merge if needed
dedupedKeys = deduplicateKeys(keys, table, parallelism);
}
List<HoodieRecord<EmptyHoodieRecordPayload>> dedupedRecords = dedupedKeys.stream().map(key -> new HoodieAvroRecord<>(key, new EmptyHoodieRecordPayload())).collect(Collectors.toList());
Instant beginTag = Instant.now();
// perform index look up to get existing location of records
List<HoodieRecord<EmptyHoodieRecordPayload>> taggedRecords = HoodieList.getList(table.getIndex().tagLocation(HoodieList.of(dedupedRecords), context, table));
Duration tagLocationDuration = Duration.between(beginTag, Instant.now());
// filter out non existent keys/records
List<HoodieRecord<EmptyHoodieRecordPayload>> taggedValidRecords = taggedRecords.stream().filter(HoodieRecord::isCurrentLocationKnown).collect(Collectors.toList());
if (!taggedValidRecords.isEmpty()) {
result = deleteExecutor.execute(taggedValidRecords);
result.setIndexLookupDuration(tagLocationDuration);
} else {
// if entire set of keys are non existent
deleteExecutor.saveWorkloadProfileMetadataToInflight(new WorkloadProfile(Pair.of(new HashMap<>(), new WorkloadStat())), instantTime);
result = new HoodieWriteMetadata<>();
result.setWriteStatuses(Collections.EMPTY_LIST);
deleteExecutor.commitOnAutoCommit(result);
}
return result;
} catch (Throwable e) {
if (e instanceof HoodieUpsertException) {
throw (HoodieUpsertException) e;
}
throw new HoodieUpsertException("Failed to delete for commit time " + instantTime, e);
}
}
use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.
the class JavaExecutionStrategy method transform.
/**
* Transform IndexedRecord into HoodieRecord.
*/
private HoodieRecord<T> transform(IndexedRecord indexedRecord) {
GenericRecord record = (GenericRecord) indexedRecord;
Option<BaseKeyGenerator> keyGeneratorOpt = Option.empty();
String key = KeyGenUtils.getRecordKeyFromGenericRecord(record, keyGeneratorOpt);
String partition = KeyGenUtils.getPartitionPathFromGenericRecord(record, keyGeneratorOpt);
HoodieKey hoodieKey = new HoodieKey(key, partition);
HoodieRecordPayload avroPayload = new RewriteAvroPayload(record);
HoodieRecord hoodieRecord = new HoodieAvroRecord(hoodieKey, avroPayload);
return hoodieRecord;
}
use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.
the class TestBitCaskDiskMap method testPutAll.
@ParameterizedTest
@ValueSource(booleans = { false, true })
public void testPutAll(boolean isCompressionEnabled) throws IOException, URISyntaxException {
BitCaskDiskMap<String, HoodieRecord> records = new BitCaskDiskMap<>(basePath, isCompressionEnabled);
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
Map<String, HoodieRecord> recordMap = new HashMap<>();
iRecords.forEach(r -> {
String key = ((GenericRecord) r).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
String partitionPath = ((GenericRecord) r).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
HoodieRecord value = new HoodieAvroRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Option.of((GenericRecord) r)));
recordMap.put(key, value);
});
records.putAll(recordMap);
// make sure records have spilled to disk
assertTrue(records.sizeOfFileOnDiskInBytes() > 0);
// make sure all added records are present
for (Map.Entry<String, HoodieRecord> entry : records.entrySet()) {
assertTrue(recordMap.containsKey(entry.getKey()));
}
}
use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.
the class TestExternalSpillableMap method testDataCorrectnessWithUpsertsToDataInMapAndOnDisk.
@ParameterizedTest
@MethodSource("testArguments")
public void testDataCorrectnessWithUpsertsToDataInMapAndOnDisk(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws IOException, URISyntaxException {
Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records = new ExternalSpillableMap<>(16L, basePath, new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema), diskMapType, // 16B
isCompressionEnabled);
List<String> recordKeys = new ArrayList<>();
// Ensure we spill to disk
while (records.getDiskBasedMapNumEntries() < 1) {
List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
recordKeys.addAll(SpillableMapTestUtils.upsertRecords(iRecords, records));
}
// Get a record from the in-Memory map
String key = recordKeys.get(0);
HoodieAvroRecord record = (HoodieAvroRecord) records.get(key);
List<IndexedRecord> recordsToUpdate = new ArrayList<>();
recordsToUpdate.add((IndexedRecord) record.getData().getInsertValue(schema).get());
String newCommitTime = HoodieActiveTimeline.createNewInstantTime();
List<String> keysToBeUpdated = new ArrayList<>();
keysToBeUpdated.add(key);
// Update the instantTime for this record
List<IndexedRecord> updatedRecords = SchemaTestUtil.updateHoodieTestRecords(keysToBeUpdated, recordsToUpdate, newCommitTime);
// Upsert this updated record
SpillableMapTestUtils.upsertRecords(updatedRecords, records);
GenericRecord gRecord = (GenericRecord) records.get(key).getData().getInsertValue(schema).get();
// The record returned for this key should have the updated commitTime
assert newCommitTime.contentEquals(gRecord.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString());
// Get a record from the disk based map
key = recordKeys.get(recordKeys.size() - 1);
record = (HoodieAvroRecord) records.get(key);
recordsToUpdate = new ArrayList<>();
recordsToUpdate.add((IndexedRecord) record.getData().getInsertValue(schema).get());
newCommitTime = HoodieActiveTimeline.createNewInstantTime();
keysToBeUpdated = new ArrayList<>();
keysToBeUpdated.add(key);
// Update the commitTime for this record
updatedRecords = SchemaTestUtil.updateHoodieTestRecords(keysToBeUpdated, recordsToUpdate, newCommitTime);
// Upsert this updated record
SpillableMapTestUtils.upsertRecords(updatedRecords, records);
gRecord = (GenericRecord) records.get(key).getData().getInsertValue(schema).get();
// The record returned for this key should have the updated instantTime
assert newCommitTime.contentEquals(gRecord.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString());
}
Aggregations