Search in sources :

Example 1 with HoodieNotSupportedException

use of org.apache.hudi.exception.HoodieNotSupportedException in project hudi by apache.

the class KafkaOffsetGen method getNextOffsetRanges.

public OffsetRange[] getNextOffsetRanges(Option<String> lastCheckpointStr, long sourceLimit, HoodieDeltaStreamerMetrics metrics) {
    // Obtain current metadata for the topic
    Map<TopicPartition, Long> fromOffsets;
    Map<TopicPartition, Long> toOffsets;
    try (KafkaConsumer consumer = new KafkaConsumer(kafkaParams)) {
        if (!checkTopicExists(consumer)) {
            throw new HoodieException("Kafka topic:" + topicName + " does not exist");
        }
        List<PartitionInfo> partitionInfoList;
        partitionInfoList = consumer.partitionsFor(topicName);
        Set<TopicPartition> topicPartitions = partitionInfoList.stream().map(x -> new TopicPartition(x.topic(), x.partition())).collect(Collectors.toSet());
        if (Config.KAFKA_CHECKPOINT_TYPE_TIMESTAMP.equals(kafkaCheckpointType) && isValidTimestampCheckpointType(lastCheckpointStr)) {
            lastCheckpointStr = getOffsetsByTimestamp(consumer, partitionInfoList, topicPartitions, topicName, Long.parseLong(lastCheckpointStr.get()));
        }
        // Determine the offset ranges to read from
        if (lastCheckpointStr.isPresent() && !lastCheckpointStr.get().isEmpty() && checkTopicCheckpoint(lastCheckpointStr)) {
            fromOffsets = fetchValidOffsets(consumer, lastCheckpointStr, topicPartitions);
            metrics.updateDeltaStreamerKafkaDelayCountMetrics(delayOffsetCalculation(lastCheckpointStr, topicPartitions, consumer));
        } else {
            switch(autoResetValue) {
                case EARLIEST:
                    fromOffsets = consumer.beginningOffsets(topicPartitions);
                    break;
                case LATEST:
                    fromOffsets = consumer.endOffsets(topicPartitions);
                    break;
                case GROUP:
                    fromOffsets = getGroupOffsets(consumer, topicPartitions);
                    break;
                default:
                    throw new HoodieNotSupportedException("Auto reset value must be one of 'earliest' or 'latest' or 'group' ");
            }
        }
        // Obtain the latest offsets.
        toOffsets = consumer.endOffsets(topicPartitions);
    }
    // Come up with final set of OffsetRanges to read (account for new partitions, limit number of events)
    long maxEventsToReadFromKafka = props.getLong(Config.MAX_EVENTS_FROM_KAFKA_SOURCE_PROP.key(), Config.MAX_EVENTS_FROM_KAFKA_SOURCE_PROP.defaultValue());
    long numEvents;
    if (sourceLimit == Long.MAX_VALUE) {
        numEvents = maxEventsToReadFromKafka;
        LOG.info("SourceLimit not configured, set numEvents to default value : " + maxEventsToReadFromKafka);
    } else {
        numEvents = sourceLimit;
    }
    if (numEvents < toOffsets.size()) {
        throw new HoodieException("sourceLimit should not be less than the number of kafka partitions");
    }
    return CheckpointUtils.computeOffsetRanges(fromOffsets, toOffsets, numEvents);
}
Also used : Arrays(java.util.Arrays) HoodieException(org.apache.hudi.exception.HoodieException) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) DataSourceUtils(org.apache.hudi.DataSourceUtils) HoodieDeltaStreamerMetrics(org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamerMetrics) Function(java.util.function.Function) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) Matcher(java.util.regex.Matcher) OffsetRange(org.apache.spark.streaming.kafka010.OffsetRange) Map(java.util.Map) AvroKafkaSource(org.apache.hudi.utilities.sources.AvroKafkaSource) HoodieDeltaStreamerException(org.apache.hudi.utilities.exception.HoodieDeltaStreamerException) HoodieNotSupportedException(org.apache.hudi.exception.HoodieNotSupportedException) TopicPartition(org.apache.kafka.common.TopicPartition) TimeoutException(org.apache.kafka.common.errors.TimeoutException) TypedProperties(org.apache.hudi.common.config.TypedProperties) Set(java.util.Set) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) PartitionInfo(org.apache.kafka.common.PartitionInfo) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) Collectors(java.util.stream.Collectors) List(java.util.List) ConfigProperty(org.apache.hudi.common.config.ConfigProperty) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) CommitFailedException(org.apache.kafka.clients.consumer.CommitFailedException) LogManager(org.apache.log4j.LogManager) Pattern(java.util.regex.Pattern) Comparator(java.util.Comparator) Collections(java.util.Collections) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) TopicPartition(org.apache.kafka.common.TopicPartition) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) HoodieException(org.apache.hudi.exception.HoodieException) PartitionInfo(org.apache.kafka.common.PartitionInfo) HoodieNotSupportedException(org.apache.hudi.exception.HoodieNotSupportedException)

Example 2 with HoodieNotSupportedException

use of org.apache.hudi.exception.HoodieNotSupportedException in project hudi by apache.

the class TimestampBasedAvroKeyGenerator method getPartitionPath.

/**
 * Parse and fetch partition path based on data type.
 *
 * @param partitionVal partition path object value fetched from record/row
 * @return the parsed partition path based on data type
 */
public String getPartitionPath(Object partitionVal) {
    initIfNeeded();
    long timeMs;
    if (partitionVal instanceof Double) {
        timeMs = convertLongTimeToMillis(((Double) partitionVal).longValue());
    } else if (partitionVal instanceof Float) {
        timeMs = convertLongTimeToMillis(((Float) partitionVal).longValue());
    } else if (partitionVal instanceof Long) {
        timeMs = convertLongTimeToMillis((Long) partitionVal);
    } else if (partitionVal instanceof Timestamp && isConsistentLogicalTimestampEnabled()) {
        timeMs = ((Timestamp) partitionVal).getTime();
    } else if (partitionVal instanceof Integer) {
        timeMs = convertLongTimeToMillis(((Integer) partitionVal).longValue());
    } else if (partitionVal instanceof BigDecimal) {
        timeMs = convertLongTimeToMillis(((BigDecimal) partitionVal).longValue());
    } else if (partitionVal instanceof CharSequence) {
        if (!inputFormatter.isPresent()) {
            throw new HoodieException("Missing inputformatter. Ensure " + KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP + " config is set when timestampType is DATE_STRING or MIXED!");
        }
        DateTime parsedDateTime = inputFormatter.get().parseDateTime(partitionVal.toString());
        if (this.outputDateTimeZone == null) {
            // Use the timezone that came off the date that was passed in, if it had one
            partitionFormatter = partitionFormatter.withZone(parsedDateTime.getZone());
        }
        timeMs = inputFormatter.get().parseDateTime(partitionVal.toString()).getMillis();
    } else {
        throw new HoodieNotSupportedException("Unexpected type for partition field: " + partitionVal.getClass().getName());
    }
    DateTime timestamp = new DateTime(timeMs, outputDateTimeZone);
    String partitionPath = timestamp.toString(partitionFormatter);
    if (encodePartitionPath) {
        partitionPath = PartitionPathEncodeUtils.escapePathName(partitionPath);
    }
    return hiveStylePartitioning ? getPartitionPathFields().get(0) + "=" + partitionPath : partitionPath;
}
Also used : HoodieException(org.apache.hudi.exception.HoodieException) HoodieNotSupportedException(org.apache.hudi.exception.HoodieNotSupportedException) Timestamp(java.sql.Timestamp) BigDecimal(java.math.BigDecimal) DateTime(org.joda.time.DateTime)

Example 3 with HoodieNotSupportedException

use of org.apache.hudi.exception.HoodieNotSupportedException in project hudi by apache.

the class HoodieLogFileReader method prev.

/**
 * This is a reverse iterator Note: At any point, an instance of HoodieLogFileReader should either iterate reverse
 * (prev) or forward (next). Doing both in the same instance is not supported WARNING : Every call to prev() should be
 * preceded with hasPrev()
 */
@Override
public HoodieLogBlock prev() throws IOException {
    if (!this.reverseReader) {
        throw new HoodieNotSupportedException("Reverse log reader has not been enabled");
    }
    long blockSize = inputStream.readLong();
    long blockEndPos = inputStream.getPos();
    // blocksize should read everything about a block including the length as well
    try {
        inputStream.seek(reverseLogFilePosition - blockSize);
    } catch (Exception e) {
        // this could be a corrupt block
        inputStream.seek(blockEndPos);
        throw new CorruptedLogFileException("Found possible corrupted block, cannot read log file in reverse, " + "fallback to forward reading of logfile");
    }
    boolean hasNext = hasNext();
    reverseLogFilePosition -= blockSize;
    lastReverseLogFilePosition = reverseLogFilePosition;
    return next();
}
Also used : CorruptedLogFileException(org.apache.hudi.exception.CorruptedLogFileException) HoodieNotSupportedException(org.apache.hudi.exception.HoodieNotSupportedException) CorruptedLogFileException(org.apache.hudi.exception.CorruptedLogFileException) HoodieNotSupportedException(org.apache.hudi.exception.HoodieNotSupportedException) IOException(java.io.IOException) EOFException(java.io.EOFException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 4 with HoodieNotSupportedException

use of org.apache.hudi.exception.HoodieNotSupportedException in project hudi by apache.

the class HoodieLogFileReader method readBlock.

// TODO : convert content and block length to long by using ByteBuffer, raw byte [] allows
// for max of Integer size
private HoodieLogBlock readBlock() throws IOException {
    int blockSize;
    try {
        // 1 Read the total size of the block
        blockSize = (int) inputStream.readLong();
    } catch (EOFException | CorruptedLogFileException e) {
        // Create a corrupt block by finding the next MAGIC marker or EOF
        return createCorruptBlock();
    }
    // We may have had a crash which could have written this block partially
    // Skip blockSize in the stream and we should either find a sync marker (start of the next
    // block) or EOF. If we did not find either of it, then this block is a corrupted block.
    boolean isCorrupted = isBlockCorrupted(blockSize);
    if (isCorrupted) {
        return createCorruptBlock();
    }
    // 2. Read the version for this log format
    HoodieLogFormat.LogFormatVersion nextBlockVersion = readVersion();
    // 3. Read the block type for a log block
    HoodieLogBlockType blockType = tryReadBlockType(nextBlockVersion);
    // 4. Read the header for a log block, if present
    Map<HeaderMetadataType, String> header = nextBlockVersion.hasHeader() ? HoodieLogBlock.getLogMetadata(inputStream) : null;
    // 5. Read the content length for the content
    // Fallback to full-block size if no content-length
    // TODO replace w/ hasContentLength
    int contentLength = nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION ? (int) inputStream.readLong() : blockSize;
    // 6. Read the content or skip content based on IO vs Memory trade-off by client
    long contentPosition = inputStream.getPos();
    boolean shouldReadLazily = readBlockLazily && nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION;
    Option<byte[]> content = HoodieLogBlock.tryReadContent(inputStream, contentLength, shouldReadLazily);
    // 7. Read footer if any
    Map<HeaderMetadataType, String> footer = nextBlockVersion.hasFooter() ? HoodieLogBlock.getLogMetadata(inputStream) : null;
    // log file in reverse
    if (nextBlockVersion.hasLogBlockLength()) {
        inputStream.readLong();
    }
    // 9. Read the log block end position in the log file
    long blockEndPos = inputStream.getPos();
    HoodieLogBlock.HoodieLogBlockContentLocation logBlockContentLoc = new HoodieLogBlock.HoodieLogBlockContentLocation(hadoopConf, logFile, contentPosition, contentLength, blockEndPos);
    switch(Objects.requireNonNull(blockType)) {
        case AVRO_DATA_BLOCK:
            if (nextBlockVersion.getVersion() == HoodieLogFormatVersion.DEFAULT_VERSION) {
                return HoodieAvroDataBlock.getBlock(content.get(), readerSchema);
            } else {
                return new HoodieAvroDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc, Option.ofNullable(readerSchema), header, footer, keyField);
            }
        case HFILE_DATA_BLOCK:
            checkState(nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION, String.format("HFile block could not be of version (%d)", HoodieLogFormatVersion.DEFAULT_VERSION));
            return new HoodieHFileDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc, Option.ofNullable(readerSchema), header, footer, enableRecordLookups);
        case PARQUET_DATA_BLOCK:
            checkState(nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION, String.format("Parquet block could not be of version (%d)", HoodieLogFormatVersion.DEFAULT_VERSION));
            return new HoodieParquetDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc, Option.ofNullable(readerSchema), header, footer, keyField);
        case DELETE_BLOCK:
            return new HoodieDeleteBlock(content, inputStream, readBlockLazily, Option.of(logBlockContentLoc), header, footer);
        case COMMAND_BLOCK:
            return new HoodieCommandBlock(content, inputStream, readBlockLazily, Option.of(logBlockContentLoc), header, footer);
        default:
            throw new HoodieNotSupportedException("Unsupported Block " + blockType);
    }
}
Also used : HoodieDeleteBlock(org.apache.hudi.common.table.log.block.HoodieDeleteBlock) CorruptedLogFileException(org.apache.hudi.exception.CorruptedLogFileException) HoodieAvroDataBlock(org.apache.hudi.common.table.log.block.HoodieAvroDataBlock) HoodieLogBlockType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType) HoodieNotSupportedException(org.apache.hudi.exception.HoodieNotSupportedException) HoodieHFileDataBlock(org.apache.hudi.common.table.log.block.HoodieHFileDataBlock) HoodieCommandBlock(org.apache.hudi.common.table.log.block.HoodieCommandBlock) HoodieLogBlock(org.apache.hudi.common.table.log.block.HoodieLogBlock) HoodieParquetDataBlock(org.apache.hudi.common.table.log.block.HoodieParquetDataBlock) HeaderMetadataType(org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType) EOFException(java.io.EOFException)

Aggregations

HoodieNotSupportedException (org.apache.hudi.exception.HoodieNotSupportedException)4 EOFException (java.io.EOFException)2 CorruptedLogFileException (org.apache.hudi.exception.CorruptedLogFileException)2 HoodieException (org.apache.hudi.exception.HoodieException)2 IOException (java.io.IOException)1 BigDecimal (java.math.BigDecimal)1 Timestamp (java.sql.Timestamp)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 Comparator (java.util.Comparator)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 Function (java.util.function.Function)1 Matcher (java.util.regex.Matcher)1 Pattern (java.util.regex.Pattern)1 Collectors (java.util.stream.Collectors)1 DataSourceUtils (org.apache.hudi.DataSourceUtils)1