Search in sources :

Example 6 with HoodieUpsertException

use of org.apache.hudi.exception.HoodieUpsertException in project hudi by apache.

the class HoodieAppendHandle method init.

private void init(HoodieRecord record) {
    if (doInit) {
        // extract some information from the first record
        SliceView rtView = hoodieTable.getSliceView();
        Option<FileSlice> fileSlice = rtView.getLatestFileSlice(partitionPath, fileId);
        // Set the base commit time as the current instantTime for new inserts into log files
        String baseInstantTime;
        String baseFile = "";
        List<String> logFiles = new ArrayList<>();
        if (fileSlice.isPresent()) {
            baseInstantTime = fileSlice.get().getBaseInstantTime();
            baseFile = fileSlice.get().getBaseFile().map(BaseFile::getFileName).orElse("");
            logFiles = fileSlice.get().getLogFiles().map(HoodieLogFile::getFileName).collect(Collectors.toList());
        } else {
            baseInstantTime = instantTime;
            // This means there is no base data file, start appending to a new log file
            fileSlice = Option.of(new FileSlice(partitionPath, baseInstantTime, this.fileId));
            LOG.info("New AppendHandle for partition :" + partitionPath);
        }
        // Prepare the first write status
        writeStatus.setStat(new HoodieDeltaWriteStat());
        writeStatus.setFileId(fileId);
        writeStatus.setPartitionPath(partitionPath);
        averageRecordSize = sizeEstimator.sizeEstimate(record);
        HoodieDeltaWriteStat deltaWriteStat = (HoodieDeltaWriteStat) writeStatus.getStat();
        deltaWriteStat.setPrevCommit(baseInstantTime);
        deltaWriteStat.setPartitionPath(partitionPath);
        deltaWriteStat.setFileId(fileId);
        deltaWriteStat.setBaseFile(baseFile);
        deltaWriteStat.setLogFiles(logFiles);
        try {
            // Save hoodie partition meta in the partition path
            HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, baseInstantTime, new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
            partitionMetadata.trySave(getPartitionId());
            // Since the actual log file written to can be different based on when rollover happens, we use the
            // base file to denote some log appends happened on a slice. writeToken will still fence concurrent
            // writers.
            // https://issues.apache.org/jira/browse/HUDI-1517
            createMarkerFile(partitionPath, FSUtils.makeDataFileName(baseInstantTime, writeToken, fileId, hoodieTable.getBaseFileExtension()));
            this.writer = createLogWriter(fileSlice, baseInstantTime);
        } catch (Exception e) {
            LOG.error("Error in update task at commit " + instantTime, e);
            writeStatus.setGlobalError(e);
            throw new HoodieUpsertException("Failed to initialize HoodieAppendHandle for FileId: " + fileId + " on commit " + instantTime + " on HDFS path " + hoodieTable.getMetaClient().getBasePath() + "/" + partitionPath, e);
        }
        doInit = false;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSlice(org.apache.hudi.common.model.FileSlice) ArrayList(java.util.ArrayList) HoodiePartitionMetadata(org.apache.hudi.common.model.HoodiePartitionMetadata) HoodieException(org.apache.hudi.exception.HoodieException) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) HoodieAppendException(org.apache.hudi.exception.HoodieAppendException) IOException(java.io.IOException) SliceView(org.apache.hudi.common.table.view.TableFileSystemView.SliceView) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) BaseFile(org.apache.hudi.common.model.BaseFile) HoodieDeltaWriteStat(org.apache.hudi.common.model.HoodieDeltaWriteStat) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile)

Example 7 with HoodieUpsertException

use of org.apache.hudi.exception.HoodieUpsertException in project hudi by apache.

the class HoodieMergeHandle method write.

/**
 * Go through an old record. Here if we detect a newer version shows up, we write the new one to the file.
 */
public void write(GenericRecord oldRecord) {
    String key = KeyGenUtils.getRecordKeyFromGenericRecord(oldRecord, keyGeneratorOpt);
    boolean copyOldRecord = true;
    if (keyToNewRecords.containsKey(key)) {
        // If we have duplicate records that we are updating, then the hoodie record will be deflated after
        // writing the first record. So make a copy of the record to be merged
        HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key).newInstance();
        try {
            Option<IndexedRecord> combinedAvroRecord = hoodieRecord.getData().combineAndGetUpdateValue(oldRecord, useWriterSchema ? tableSchemaWithMetaFields : tableSchema, config.getPayloadConfig().getProps());
            if (combinedAvroRecord.isPresent() && combinedAvroRecord.get().equals(IGNORE_RECORD)) {
                // If it is an IGNORE_RECORD, just copy the old record, and do not update the new record.
                copyOldRecord = true;
            } else if (writeUpdateRecord(hoodieRecord, oldRecord, combinedAvroRecord)) {
                /*
           * ONLY WHEN 1) we have an update for this key AND 2) We are able to successfully
           * write the the combined new
           * value
           *
           * We no longer need to copy the old record over.
           */
                copyOldRecord = false;
            }
            writtenRecordKeys.add(key);
        } catch (Exception e) {
            throw new HoodieUpsertException("Failed to combine/merge new record with old value in storage, for new record {" + keyToNewRecords.get(key) + "}, old value {" + oldRecord + "}", e);
        }
    }
    if (copyOldRecord) {
        // this should work as it is, since this is an existing record
        try {
            fileWriter.writeAvro(key, oldRecord);
        } catch (IOException | RuntimeException e) {
            String errMsg = String.format("Failed to merge old record into new file for key %s from old file %s to new file %s with writerSchema %s", key, getOldFilePath(), newFilePath, writeSchemaWithMetaFields.toString(true));
            LOG.debug("Old record is " + oldRecord);
            throw new HoodieUpsertException(errMsg, e);
        }
        recordsWritten++;
    }
}
Also used : HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) IndexedRecord(org.apache.avro.generic.IndexedRecord) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) IOException(java.io.IOException) HoodieCorruptedDataException(org.apache.hudi.exception.HoodieCorruptedDataException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 8 with HoodieUpsertException

use of org.apache.hudi.exception.HoodieUpsertException in project hudi by apache.

the class HoodieMergeHandle method performMergeDataValidationCheck.

public void performMergeDataValidationCheck(WriteStatus writeStatus) {
    if (!config.isMergeDataValidationCheckEnabled()) {
        return;
    }
    long oldNumWrites = 0;
    try {
        HoodieFileReader reader = HoodieFileReaderFactory.getFileReader(hoodieTable.getHadoopConf(), oldFilePath);
        oldNumWrites = reader.getTotalRecords();
    } catch (IOException e) {
        throw new HoodieUpsertException("Failed to check for merge data validation", e);
    }
    if ((writeStatus.getStat().getNumWrites() + writeStatus.getStat().getNumDeletes()) < oldNumWrites) {
        throw new HoodieCorruptedDataException(String.format("Record write count decreased for file: %s, Partition Path: %s (%s:%d + %d < %s:%d)", writeStatus.getFileId(), writeStatus.getPartitionPath(), instantTime, writeStatus.getStat().getNumWrites(), writeStatus.getStat().getNumDeletes(), FSUtils.getCommitTime(oldFilePath.toString()), oldNumWrites));
    }
}
Also used : HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) HoodieFileReader(org.apache.hudi.io.storage.HoodieFileReader) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCorruptedDataException(org.apache.hudi.exception.HoodieCorruptedDataException)

Example 9 with HoodieUpsertException

use of org.apache.hudi.exception.HoodieUpsertException in project hudi by apache.

the class HoodieMergeHandle method init.

/**
 * Extract old file path, initialize StorageWriter and WriteStatus.
 */
private void init(String fileId, String partitionPath, HoodieBaseFile baseFileToMerge) {
    LOG.info("partitionPath:" + partitionPath + ", fileId to be merged:" + fileId);
    this.baseFileToMerge = baseFileToMerge;
    this.writtenRecordKeys = new HashSet<>();
    writeStatus.setStat(new HoodieWriteStat());
    try {
        String latestValidFilePath = baseFileToMerge.getFileName();
        writeStatus.getStat().setPrevCommit(FSUtils.getCommitTime(latestValidFilePath));
        HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, instantTime, new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
        partitionMetadata.trySave(getPartitionId());
        String newFileName = FSUtils.makeDataFileName(instantTime, writeToken, fileId, hoodieTable.getBaseFileExtension());
        makeOldAndNewFilePaths(partitionPath, latestValidFilePath, newFileName);
        LOG.info(String.format("Merging new data into oldPath %s, as newPath %s", oldFilePath.toString(), newFilePath.toString()));
        // file name is same for all records, in this bunch
        writeStatus.setFileId(fileId);
        writeStatus.setPartitionPath(partitionPath);
        writeStatus.getStat().setPartitionPath(partitionPath);
        writeStatus.getStat().setFileId(fileId);
        setWriteStatusPath();
        // Create Marker file
        createMarkerFile(partitionPath, newFileName);
        // Create the writer for writing the new version file
        fileWriter = createNewFileWriter(instantTime, newFilePath, hoodieTable, config, writeSchemaWithMetaFields, taskContextSupplier);
    } catch (IOException io) {
        LOG.error("Error in update task at commit " + instantTime, io);
        writeStatus.setGlobalError(io);
        throw new HoodieUpsertException("Failed to initialize HoodieUpdateHandle for FileId: " + fileId + " on commit " + instantTime + " on path " + hoodieTable.getMetaClient().getBasePath(), io);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) HoodiePartitionMetadata(org.apache.hudi.common.model.HoodiePartitionMetadata) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 10 with HoodieUpsertException

use of org.apache.hudi.exception.HoodieUpsertException in project hudi by apache.

the class HoodieSortedMergeHandle method write.

/**
 * Go through an old record. Here if we detect a newer version shows up, we write the new one to the file.
 */
@Override
public void write(GenericRecord oldRecord) {
    String key = KeyGenUtils.getRecordKeyFromGenericRecord(oldRecord, keyGeneratorOpt);
    // the oldRecord's key.
    while (!newRecordKeysSorted.isEmpty() && newRecordKeysSorted.peek().compareTo(key) <= 0) {
        String keyToPreWrite = newRecordKeysSorted.remove();
        if (keyToPreWrite.equals(key)) {
            // will be handled as an update later
            break;
        }
        // This is a new insert
        HoodieRecord<T> hoodieRecord = keyToNewRecords.get(keyToPreWrite).newInstance();
        if (writtenRecordKeys.contains(keyToPreWrite)) {
            throw new HoodieUpsertException("Insert/Update not in sorted order");
        }
        try {
            if (useWriterSchema) {
                writeRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(tableSchemaWithMetaFields, config.getProps()));
            } else {
                writeRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(tableSchema, config.getProps()));
            }
            insertRecordsWritten++;
            writtenRecordKeys.add(keyToPreWrite);
        } catch (IOException e) {
            throw new HoodieUpsertException("Failed to write records", e);
        }
    }
    super.write(oldRecord);
}
Also used : HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) IOException(java.io.IOException)

Aggregations

HoodieUpsertException (org.apache.hudi.exception.HoodieUpsertException)24 IOException (java.io.IOException)13 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)6 HoodieIOException (org.apache.hudi.exception.HoodieIOException)6 Duration (java.time.Duration)5 Instant (java.time.Instant)5 List (java.util.List)4 WriteStatus (org.apache.hudi.client.WriteStatus)4 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)4 HashMap (java.util.HashMap)3 LinkedList (java.util.LinkedList)3 IndexedRecord (org.apache.avro.generic.IndexedRecord)3 Path (org.apache.hadoop.fs.Path)3 HoodieList (org.apache.hudi.common.data.HoodieList)3 EmptyHoodieRecordPayload (org.apache.hudi.common.model.EmptyHoodieRecordPayload)3 HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)3 HoodieKey (org.apache.hudi.common.model.HoodieKey)3 Pair (org.apache.hudi.common.util.collection.Pair)3 HoodieCorruptedDataException (org.apache.hudi.exception.HoodieCorruptedDataException)3 HoodieInsertException (org.apache.hudi.exception.HoodieInsertException)3