Search in sources :

Example 6 with HoodieRecordLocation

use of org.apache.hudi.common.model.HoodieRecordLocation in project hudi by apache.

the class WriteProfile method smallFilesProfile.

/**
 * Returns a list of small files in the given partition path from the latest filesystem view.
 */
protected List<SmallFile> smallFilesProfile(String partitionPath) {
    // smallFiles only for partitionPath
    List<SmallFile> smallFileLocations = new ArrayList<>();
    HoodieTimeline commitTimeline = metaClient.getCommitsTimeline().filterCompletedInstants();
    if (!commitTimeline.empty()) {
        // if we have some commits
        HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
        List<HoodieBaseFile> allFiles = fsView.getLatestBaseFilesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp()).collect(Collectors.toList());
        for (HoodieBaseFile file : allFiles) {
            // filter out the corrupted files.
            if (file.getFileSize() < config.getParquetSmallFileLimit() && file.getFileSize() > 0) {
                String filename = file.getFileName();
                SmallFile sf = new SmallFile();
                sf.location = new HoodieRecordLocation(FSUtils.getCommitTime(filename), FSUtils.getFileId(filename));
                sf.sizeBytes = file.getFileSize();
                smallFileLocations.add(sf);
            }
        }
    }
    return smallFileLocations;
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) SmallFile(org.apache.hudi.table.action.commit.SmallFile) ArrayList(java.util.ArrayList) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation)

Example 7 with HoodieRecordLocation

use of org.apache.hudi.common.model.HoodieRecordLocation in project hudi by apache.

the class HoodieAppendHandle method writeToBuffer.

private void writeToBuffer(HoodieRecord<T> record) {
    if (!partitionPath.equals(record.getPartitionPath())) {
        HoodieUpsertException failureEx = new HoodieUpsertException("mismatched partition path, record partition: " + record.getPartitionPath() + " but trying to insert into partition: " + partitionPath);
        writeStatus.markFailure(record, failureEx, record.getData().getMetadata());
        return;
    }
    // update the new location of the record, so we know where to find it next
    if (needsUpdateLocation()) {
        record.unseal();
        record.setNewLocation(new HoodieRecordLocation(instantTime, fileId));
        record.seal();
    }
    Option<IndexedRecord> indexedRecord = getIndexedRecord(record);
    if (indexedRecord.isPresent()) {
        // Skip the Ignore Record.
        if (!indexedRecord.get().equals(IGNORE_RECORD)) {
            recordList.add(indexedRecord.get());
        }
    } else {
        keysToDelete.add(record.getKey());
    }
    numberOfRecords++;
}
Also used : HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation)

Example 8 with HoodieRecordLocation

use of org.apache.hudi.common.model.HoodieRecordLocation in project hudi by apache.

the class HoodieConcatHandle method writeIncomingRecords.

@Override
protected void writeIncomingRecords() throws IOException {
    while (recordItr.hasNext()) {
        HoodieRecord<T> record = recordItr.next();
        if (needsUpdateLocation()) {
            record.unseal();
            record.setNewLocation(new HoodieRecordLocation(instantTime, fileId));
            record.seal();
        }
        writeInsertRecord(record);
    }
}
Also used : HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation)

Example 9 with HoodieRecordLocation

use of org.apache.hudi.common.model.HoodieRecordLocation in project hudi by apache.

the class HoodieCreateHandle method write.

/**
 * Perform the actual writing of the given record into the backing file.
 */
@Override
public void write(HoodieRecord record, Option<IndexedRecord> avroRecord) {
    Option recordMetadata = ((HoodieRecordPayload) record.getData()).getMetadata();
    if (HoodieOperation.isDelete(record.getOperation())) {
        avroRecord = Option.empty();
    }
    try {
        if (avroRecord.isPresent()) {
            if (avroRecord.get().equals(IGNORE_RECORD)) {
                return;
            }
            // Convert GenericRecord to GenericRecord with hoodie commit metadata in schema
            IndexedRecord recordWithMetadataInSchema = rewriteRecord((GenericRecord) avroRecord.get());
            if (preserveHoodieMetadata) {
                // do not preserve FILENAME_METADATA_FIELD
                recordWithMetadataInSchema.put(HoodieRecord.HOODIE_META_COLUMNS_NAME_TO_POS.get(HoodieRecord.FILENAME_METADATA_FIELD), path.getName());
                fileWriter.writeAvro(record.getRecordKey(), recordWithMetadataInSchema);
            } else {
                fileWriter.writeAvroWithMetadata(recordWithMetadataInSchema, record);
            }
            // update the new location of record, so we know where to find it next
            record.unseal();
            record.setNewLocation(new HoodieRecordLocation(instantTime, writeStatus.getFileId()));
            record.seal();
            recordsWritten++;
            insertRecordsWritten++;
        } else {
            recordsDeleted++;
        }
        writeStatus.markSuccess(record, recordMetadata);
        // deflate record payload after recording success. This will help users access payload as a
        // part of marking
        // record successful.
        record.deflate();
    } catch (Throwable t) {
        // Not throwing exception from here, since we don't want to fail the entire job
        // for a single record
        writeStatus.markFailure(record, t, recordMetadata);
        LOG.error("Error writing record " + record, t);
    }
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) Option(org.apache.hudi.common.util.Option) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload)

Example 10 with HoodieRecordLocation

use of org.apache.hudi.common.model.HoodieRecordLocation in project hudi by apache.

the class HoodieKeyLocationFetchHandle method locations.

public Stream<Pair<HoodieKey, HoodieRecordLocation>> locations() {
    HoodieBaseFile baseFile = partitionPathBaseFilePair.getRight();
    BaseFileUtils baseFileUtils = BaseFileUtils.getInstance(baseFile.getPath());
    List<HoodieKey> hoodieKeyList = new ArrayList<>();
    if (keyGeneratorOpt.isPresent()) {
        hoodieKeyList = baseFileUtils.fetchHoodieKeys(hoodieTable.getHadoopConf(), new Path(baseFile.getPath()), keyGeneratorOpt);
    } else {
        hoodieKeyList = baseFileUtils.fetchHoodieKeys(hoodieTable.getHadoopConf(), new Path(baseFile.getPath()));
    }
    return hoodieKeyList.stream().map(entry -> Pair.of(entry, new HoodieRecordLocation(baseFile.getCommitTime(), baseFile.getFileId())));
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieKey(org.apache.hudi.common.model.HoodieKey) ArrayList(java.util.ArrayList) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation)

Aggregations

HoodieRecordLocation (org.apache.hudi.common.model.HoodieRecordLocation)43 ArrayList (java.util.ArrayList)18 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)17 HashMap (java.util.HashMap)16 List (java.util.List)16 HoodieKey (org.apache.hudi.common.model.HoodieKey)16 Map (java.util.Map)13 Pair (org.apache.hudi.common.util.collection.Pair)12 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)9 Option (org.apache.hudi.common.util.Option)9 IOException (java.io.IOException)8 WorkloadStat (org.apache.hudi.table.WorkloadStat)8 SmallFile (org.apache.hudi.table.action.commit.SmallFile)8 Tuple2 (scala.Tuple2)8 HoodieRecordPayload (org.apache.hudi.common.model.HoodieRecordPayload)7 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)7 HoodieTable (org.apache.hudi.table.HoodieTable)7 LogManager (org.apache.log4j.LogManager)7 Logger (org.apache.log4j.Logger)7 Collectors (java.util.stream.Collectors)6