use of org.apache.hudi.common.model.HoodieRecordLocation in project hudi by apache.
the class HoodieMergeHandle method init.
/**
* Load the new incoming records in a map and return partitionPath.
*/
protected void init(String fileId, Iterator<HoodieRecord<T>> newRecordsItr) {
initializeIncomingRecordsMap();
while (newRecordsItr.hasNext()) {
HoodieRecord<T> record = newRecordsItr.next();
// update the new location of the record, so we know where to find it next
if (needsUpdateLocation()) {
record.unseal();
record.setNewLocation(new HoodieRecordLocation(instantTime, fileId));
record.seal();
}
// NOTE: Once Records are added to map (spillable-map), DO NOT change it as they won't persist
keyToNewRecords.put(record.getRecordKey(), record);
}
LOG.info("Number of entries in MemoryBasedMap => " + ((ExternalSpillableMap) keyToNewRecords).getInMemoryMapNumEntries() + "Total size in bytes of MemoryBasedMap => " + ((ExternalSpillableMap) keyToNewRecords).getCurrentInMemoryMapSize() + "Number of entries in BitCaskDiskMap => " + ((ExternalSpillableMap) keyToNewRecords).getDiskBasedMapNumEntries() + "Size of file spilled to disk => " + ((ExternalSpillableMap) keyToNewRecords).getSizeOfFileOnDiskInBytes());
}
use of org.apache.hudi.common.model.HoodieRecordLocation in project hudi by apache.
the class ListBasedHoodieBloomIndexHelper method findMatchingFilesForRecordKeys.
@Override
public HoodiePairData<HoodieKey, HoodieRecordLocation> findMatchingFilesForRecordKeys(HoodieWriteConfig config, HoodieEngineContext context, HoodieTable hoodieTable, HoodiePairData<String, String> partitionRecordKeyPairs, HoodieData<Pair<String, HoodieKey>> fileComparisonPairs, Map<String, List<BloomIndexFileInfo>> partitionToFileInfo, Map<String, Long> recordsPerPartition) {
List<Pair<String, HoodieKey>> fileComparisonPairList = HoodieList.getList(fileComparisonPairs).stream().sorted(Comparator.comparing(Pair::getLeft)).collect(toList());
List<HoodieKeyLookupResult> keyLookupResults = new ArrayList<>();
Iterator<List<HoodieKeyLookupResult>> iterator = new HoodieBaseBloomIndexCheckFunction(hoodieTable, config).apply(fileComparisonPairList.iterator());
while (iterator.hasNext()) {
keyLookupResults.addAll(iterator.next());
}
keyLookupResults = keyLookupResults.stream().filter(lr -> lr.getMatchingRecordKeys().size() > 0).collect(toList());
return context.parallelize(keyLookupResults).flatMap(lookupResult -> lookupResult.getMatchingRecordKeys().stream().map(recordKey -> new ImmutablePair<>(lookupResult, recordKey)).iterator()).mapToPair(pair -> {
HoodieKeyLookupResult lookupResult = pair.getLeft();
String recordKey = pair.getRight();
return new ImmutablePair<>(new HoodieKey(recordKey, lookupResult.getPartitionPath()), new HoodieRecordLocation(lookupResult.getBaseInstantTime(), lookupResult.getFileId()));
});
}
use of org.apache.hudi.common.model.HoodieRecordLocation in project hudi by apache.
the class HoodieInMemoryHashIndex method updateLocation.
@Override
public HoodieData<WriteStatus> updateLocation(HoodieData<WriteStatus> writeStatuses, HoodieEngineContext context, HoodieTable hoodieTable) {
return writeStatuses.map(writeStatus -> {
for (HoodieRecord record : writeStatus.getWrittenRecords()) {
if (!writeStatus.isErrored(record.getKey())) {
HoodieKey key = record.getKey();
Option<HoodieRecordLocation> newLocation = record.getNewLocation();
if (newLocation.isPresent()) {
recordLocationMap.put(key, newLocation.get());
} else {
// Delete existing index for a deleted record
recordLocationMap.remove(key);
}
}
}
return writeStatus;
});
}
use of org.apache.hudi.common.model.HoodieRecordLocation in project hudi by apache.
the class HoodieFlinkWriteableTestTable method appendRecordsToLogFile.
private Pair<String, HoodieLogFile> appendRecordsToLogFile(List<HoodieRecord> groupedRecords) throws Exception {
String partitionPath = groupedRecords.get(0).getPartitionPath();
HoodieRecordLocation location = groupedRecords.get(0).getCurrentLocation();
try (HoodieLogFormat.Writer logWriter = HoodieLogFormat.newWriterBuilder().onParentPath(new Path(basePath, partitionPath)).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId(location.getFileId()).overBaseCommit(location.getInstantTime()).withFs(fs).build()) {
Map<HeaderMetadataType, String> header = new java.util.HashMap<>();
header.put(HeaderMetadataType.INSTANT_TIME, location.getInstantTime());
header.put(HeaderMetadataType.SCHEMA, schema.toString());
logWriter.appendBlock(new HoodieAvroDataBlock(groupedRecords.stream().map(r -> {
try {
GenericRecord val = (GenericRecord) ((HoodieRecordPayload) r.getData()).getInsertValue(schema).get();
HoodieAvroUtils.addHoodieKeyToRecord(val, r.getRecordKey(), r.getPartitionPath(), "");
return (IndexedRecord) val;
} catch (IOException e) {
LOG.warn("Failed to convert record " + r.toString(), e);
return null;
}
}).collect(Collectors.toList()), header, HoodieRecord.RECORD_KEY_METADATA_FIELD));
return Pair.of(partitionPath, logWriter.getLogFile());
}
}
use of org.apache.hudi.common.model.HoodieRecordLocation in project hudi by apache.
the class BaseJavaCommitActionExecutor method buildProfile.
protected Pair<HashMap<String, WorkloadStat>, WorkloadStat> buildProfile(List<HoodieRecord<T>> inputRecords) {
HashMap<String, WorkloadStat> partitionPathStatMap = new HashMap<>();
WorkloadStat globalStat = new WorkloadStat();
Map<Pair<String, Option<HoodieRecordLocation>>, Long> partitionLocationCounts = inputRecords.stream().map(record -> Pair.of(Pair.of(record.getPartitionPath(), Option.ofNullable(record.getCurrentLocation())), record)).collect(Collectors.groupingBy(Pair::getLeft, Collectors.counting()));
for (Map.Entry<Pair<String, Option<HoodieRecordLocation>>, Long> e : partitionLocationCounts.entrySet()) {
String partitionPath = e.getKey().getLeft();
Long count = e.getValue();
Option<HoodieRecordLocation> locOption = e.getKey().getRight();
if (!partitionPathStatMap.containsKey(partitionPath)) {
partitionPathStatMap.put(partitionPath, new WorkloadStat());
}
if (locOption.isPresent()) {
// update
partitionPathStatMap.get(partitionPath).addUpdates(locOption.get(), count);
globalStat.addUpdates(locOption.get(), count);
} else {
// insert
partitionPathStatMap.get(partitionPath).addInserts(count);
globalStat.addInserts(count);
}
}
return Pair.of(partitionPathStatMap, globalStat);
}
Aggregations