Search in sources :

Example 1 with CorruptedLogFileException

use of org.apache.hudi.exception.CorruptedLogFileException in project hudi by apache.

the class HoodieLogFileReader method prev.

/**
 * This is a reverse iterator Note: At any point, an instance of HoodieLogFileReader should either iterate reverse
 * (prev) or forward (next). Doing both in the same instance is not supported WARNING : Every call to prev() should be
 * preceded with hasPrev()
 */
@Override
public HoodieLogBlock prev() throws IOException {
    if (!this.reverseReader) {
        throw new HoodieNotSupportedException("Reverse log reader has not been enabled");
    }
    long blockSize = inputStream.readLong();
    long blockEndPos = inputStream.getPos();
    // blocksize should read everything about a block including the length as well
    try {
        inputStream.seek(reverseLogFilePosition - blockSize);
    } catch (Exception e) {
        // this could be a corrupt block
        inputStream.seek(blockEndPos);
        throw new CorruptedLogFileException("Found possible corrupted block, cannot read log file in reverse, " + "fallback to forward reading of logfile");
    }
    boolean hasNext = hasNext();
    reverseLogFilePosition -= blockSize;
    lastReverseLogFilePosition = reverseLogFilePosition;
    return next();
}
Also used : CorruptedLogFileException(org.apache.hudi.exception.CorruptedLogFileException) HoodieNotSupportedException(org.apache.hudi.exception.HoodieNotSupportedException) CorruptedLogFileException(org.apache.hudi.exception.CorruptedLogFileException) HoodieNotSupportedException(org.apache.hudi.exception.HoodieNotSupportedException) IOException(java.io.IOException) EOFException(java.io.EOFException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 2 with CorruptedLogFileException

use of org.apache.hudi.exception.CorruptedLogFileException in project hudi by apache.

the class HoodieLogFileReader method readBlock.

// TODO : convert content and block length to long by using ByteBuffer, raw byte [] allows
// for max of Integer size
private HoodieLogBlock readBlock() throws IOException {
    int blockSize;
    try {
        // 1 Read the total size of the block
        blockSize = (int) inputStream.readLong();
    } catch (EOFException | CorruptedLogFileException e) {
        // Create a corrupt block by finding the next MAGIC marker or EOF
        return createCorruptBlock();
    }
    // We may have had a crash which could have written this block partially
    // Skip blockSize in the stream and we should either find a sync marker (start of the next
    // block) or EOF. If we did not find either of it, then this block is a corrupted block.
    boolean isCorrupted = isBlockCorrupted(blockSize);
    if (isCorrupted) {
        return createCorruptBlock();
    }
    // 2. Read the version for this log format
    HoodieLogFormat.LogFormatVersion nextBlockVersion = readVersion();
    // 3. Read the block type for a log block
    HoodieLogBlockType blockType = tryReadBlockType(nextBlockVersion);
    // 4. Read the header for a log block, if present
    Map<HeaderMetadataType, String> header = nextBlockVersion.hasHeader() ? HoodieLogBlock.getLogMetadata(inputStream) : null;
    // 5. Read the content length for the content
    // Fallback to full-block size if no content-length
    // TODO replace w/ hasContentLength
    int contentLength = nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION ? (int) inputStream.readLong() : blockSize;
    // 6. Read the content or skip content based on IO vs Memory trade-off by client
    long contentPosition = inputStream.getPos();
    boolean shouldReadLazily = readBlockLazily && nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION;
    Option<byte[]> content = HoodieLogBlock.tryReadContent(inputStream, contentLength, shouldReadLazily);
    // 7. Read footer if any
    Map<HeaderMetadataType, String> footer = nextBlockVersion.hasFooter() ? HoodieLogBlock.getLogMetadata(inputStream) : null;
    // log file in reverse
    if (nextBlockVersion.hasLogBlockLength()) {
        inputStream.readLong();
    }
    // 9. Read the log block end position in the log file
    long blockEndPos = inputStream.getPos();
    HoodieLogBlock.HoodieLogBlockContentLocation logBlockContentLoc = new HoodieLogBlock.HoodieLogBlockContentLocation(hadoopConf, logFile, contentPosition, contentLength, blockEndPos);
    switch(Objects.requireNonNull(blockType)) {
        case AVRO_DATA_BLOCK:
            if (nextBlockVersion.getVersion() == HoodieLogFormatVersion.DEFAULT_VERSION) {
                return HoodieAvroDataBlock.getBlock(content.get(), readerSchema);
            } else {
                return new HoodieAvroDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc, Option.ofNullable(readerSchema), header, footer, keyField);
            }
        case HFILE_DATA_BLOCK:
            checkState(nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION, String.format("HFile block could not be of version (%d)", HoodieLogFormatVersion.DEFAULT_VERSION));
            return new HoodieHFileDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc, Option.ofNullable(readerSchema), header, footer, enableRecordLookups);
        case PARQUET_DATA_BLOCK:
            checkState(nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION, String.format("Parquet block could not be of version (%d)", HoodieLogFormatVersion.DEFAULT_VERSION));
            return new HoodieParquetDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc, Option.ofNullable(readerSchema), header, footer, keyField);
        case DELETE_BLOCK:
            return new HoodieDeleteBlock(content, inputStream, readBlockLazily, Option.of(logBlockContentLoc), header, footer);
        case COMMAND_BLOCK:
            return new HoodieCommandBlock(content, inputStream, readBlockLazily, Option.of(logBlockContentLoc), header, footer);
        default:
            throw new HoodieNotSupportedException("Unsupported Block " + blockType);
    }
}
Also used : HoodieDeleteBlock(org.apache.hudi.common.table.log.block.HoodieDeleteBlock) CorruptedLogFileException(org.apache.hudi.exception.CorruptedLogFileException) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieLogBlockType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType) HoodieNotSupportedException(org.apache.hudi.exception.HoodieNotSupportedException) HoodieHFileDataBlock(org.apache.hudi.common.table.log.block.HoodieHFileDataBlock) HoodieCommandBlock(org.apache.hudi.common.table.log.block.HoodieCommandBlock) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) HoodieParquetDataBlock(org.apache.hudi.common.table.log.block.HoodieParquetDataBlock) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) EOFException(java.io.EOFException)

Aggregations

EOFException (java.io.EOFException)2 CorruptedLogFileException (org.apache.hudi.exception.CorruptedLogFileException)2 HoodieNotSupportedException (org.apache.hudi.exception.HoodieNotSupportedException)2 IOException (java.io.IOException)1 HoodieAvroDataBlock (org.apache.hudi.common.table.log.block.HoodieAvroDataBlock)1 HoodieCommandBlock (org.apache.hudi.common.table.log.block.HoodieCommandBlock)1 HoodieDeleteBlock (org.apache.hudi.common.table.log.block.HoodieDeleteBlock)1 HoodieHFileDataBlock (org.apache.hudi.common.table.log.block.HoodieHFileDataBlock)1 HoodieLogBlock (org.apache.hudi.common.table.log.block.HoodieLogBlock)1 HeaderMetadataType (org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType)1 HoodieLogBlockType (org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType)1 HoodieParquetDataBlock (org.apache.hudi.common.table.log.block.HoodieParquetDataBlock)1 HoodieIOException (org.apache.hudi.exception.HoodieIOException)1