use of org.apache.hudi.common.model.HoodieRecordLocation in project hudi by apache.
the class WriteProfile method smallFilesProfile.
/**
* Returns a list of small files in the given partition path from the latest filesystem view.
*/
protected List<SmallFile> smallFilesProfile(String partitionPath) {
// smallFiles only for partitionPath
List<SmallFile> smallFileLocations = new ArrayList<>();
HoodieTimeline commitTimeline = metaClient.getCommitsTimeline().filterCompletedInstants();
if (!commitTimeline.empty()) {
// if we have some commits
HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
List<HoodieBaseFile> allFiles = fsView.getLatestBaseFilesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp()).collect(Collectors.toList());
for (HoodieBaseFile file : allFiles) {
// filter out the corrupted files.
if (file.getFileSize() < config.getParquetSmallFileLimit() && file.getFileSize() > 0) {
String filename = file.getFileName();
SmallFile sf = new SmallFile();
sf.location = new HoodieRecordLocation(FSUtils.getCommitTime(filename), FSUtils.getFileId(filename));
sf.sizeBytes = file.getFileSize();
smallFileLocations.add(sf);
}
}
}
return smallFileLocations;
}
use of org.apache.hudi.common.model.HoodieRecordLocation in project hudi by apache.
the class HoodieAppendHandle method writeToBuffer.
private void writeToBuffer(HoodieRecord<T> record) {
if (!partitionPath.equals(record.getPartitionPath())) {
HoodieUpsertException failureEx = new HoodieUpsertException("mismatched partition path, record partition: " + record.getPartitionPath() + " but trying to insert into partition: " + partitionPath);
writeStatus.markFailure(record, failureEx, record.getData().getMetadata());
return;
}
// update the new location of the record, so we know where to find it next
if (needsUpdateLocation()) {
record.unseal();
record.setNewLocation(new HoodieRecordLocation(instantTime, fileId));
record.seal();
}
Option<IndexedRecord> indexedRecord = getIndexedRecord(record);
if (indexedRecord.isPresent()) {
// Skip the Ignore Record.
if (!indexedRecord.get().equals(IGNORE_RECORD)) {
recordList.add(indexedRecord.get());
}
} else {
keysToDelete.add(record.getKey());
}
numberOfRecords++;
}
use of org.apache.hudi.common.model.HoodieRecordLocation in project hudi by apache.
the class HoodieConcatHandle method writeIncomingRecords.
@Override
protected void writeIncomingRecords() throws IOException {
while (recordItr.hasNext()) {
HoodieRecord<T> record = recordItr.next();
if (needsUpdateLocation()) {
record.unseal();
record.setNewLocation(new HoodieRecordLocation(instantTime, fileId));
record.seal();
}
writeInsertRecord(record);
}
}
use of org.apache.hudi.common.model.HoodieRecordLocation in project hudi by apache.
the class HoodieCreateHandle method write.
/**
* Perform the actual writing of the given record into the backing file.
*/
@Override
public void write(HoodieRecord record, Option<IndexedRecord> avroRecord) {
Option recordMetadata = ((HoodieRecordPayload) record.getData()).getMetadata();
if (HoodieOperation.isDelete(record.getOperation())) {
avroRecord = Option.empty();
}
try {
if (avroRecord.isPresent()) {
if (avroRecord.get().equals(IGNORE_RECORD)) {
return;
}
// Convert GenericRecord to GenericRecord with hoodie commit metadata in schema
IndexedRecord recordWithMetadataInSchema = rewriteRecord((GenericRecord) avroRecord.get());
if (preserveHoodieMetadata) {
// do not preserve FILENAME_METADATA_FIELD
recordWithMetadataInSchema.put(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.FILENAME_METADATA_FIELD), path.getName());
fileWriter.writeAvro(record.getRecordKey(), recordWithMetadataInSchema);
} else {
fileWriter.writeAvroWithMetadata(recordWithMetadataInSchema, record);
}
// update the new location of record, so we know where to find it next
record.unseal();
record.setNewLocation(new HoodieRecordLocation(instantTime, writeStatus.getFileId()));
record.seal();
recordsWritten++;
insertRecordsWritten++;
} else {
recordsDeleted++;
}
writeStatus.markSuccess(record, recordMetadata);
// deflate record payload after recording success. This will help users access payload as a
// part of marking
// record successful.
record.deflate();
} catch (Throwable t) {
// Not throwing exception from here, since we don't want to fail the entire job
// for a single record
writeStatus.markFailure(record, t, recordMetadata);
LOG.error("Error writing record " + record, t);
}
}
use of org.apache.hudi.common.model.HoodieRecordLocation in project hudi by apache.
the class HoodieKeyLocationFetchHandle method locations.
public Stream<Pair<HoodieKey, HoodieRecordLocation>> locations() {
HoodieBaseFile baseFile = partitionPathBaseFilePair.getRight();
BaseFileUtils baseFileUtils = BaseFileUtils.getInstance(baseFile.getPath());
List<HoodieKey> hoodieKeyList = new ArrayList<>();
if (keyGeneratorOpt.isPresent()) {
hoodieKeyList = baseFileUtils.fetchHoodieKeys(hoodieTable.getHadoopConf(), new Path(baseFile.getPath()), keyGeneratorOpt);
} else {
hoodieKeyList = baseFileUtils.fetchHoodieKeys(hoodieTable.getHadoopConf(), new Path(baseFile.getPath()));
}
return hoodieKeyList.stream().map(entry -> Pair.of(entry, new HoodieRecordLocation(baseFile.getCommitTime(), baseFile.getFileId())));
}
Aggregations