Search in sources :

Example 1 with SegmentNotWritableException

use of org.apache.druid.segment.realtime.appenderator.SegmentNotWritableException in project hive by apache.

the class DruidRecordWriter method write.

@Override
public void write(Writable w) throws IOException {
    DruidWritable record = (DruidWritable) w;
    final long timestamp = (long) record.getValue().get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN);
    final int partitionNumber = Math.toIntExact((long) record.getValue().getOrDefault(Constants.DRUID_SHARD_KEY_COL_NAME, -1L));
    final InputRow inputRow = new MapBasedInputRow(timestamp, dataSchema.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(), record.getValue());
    try {
        if (partitionNumber != -1 && maxPartitionSize == -1) {
            /*
        Case data is sorted by time and an extra hashing dimension see DRUID_SHARD_KEY_COL_NAME
        Thus use DRUID_SHARD_KEY_COL_NAME as segment partition in addition to time dimension
        Data with the same DRUID_SHARD_KEY_COL_NAME and Time interval will end in the same segment
        */
            DateTime truncatedDateTime = segmentGranularity.bucketStart(DateTimes.utc(timestamp));
            final Interval interval = new Interval(truncatedDateTime, segmentGranularity.increment(truncatedDateTime));
            if (currentOpenSegment != null) {
                if (currentOpenSegment.getShardSpec().getPartitionNum() != partitionNumber || !currentOpenSegment.getInterval().equals(interval)) {
                    pushSegments(ImmutableList.of(currentOpenSegment));
                    currentOpenSegment = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(partitionNumber));
                }
            } else {
                currentOpenSegment = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(partitionNumber));
            }
            appenderator.add(currentOpenSegment, inputRow, committerSupplier::get);
        } else if (partitionNumber == -1 && maxPartitionSize != -1) {
            /*Case we are partitioning the segments based on time and max row per segment maxPartitionSize*/
            appenderator.add(getSegmentIdentifierAndMaybePush(timestamp), inputRow, committerSupplier::get);
        } else {
            throw new IllegalArgumentException(String.format("partitionNumber and maxPartitionSize should be mutually exclusive " + "got partitionNum [%s] and maxPartitionSize [%s]", partitionNumber, maxPartitionSize));
        }
    } catch (SegmentNotWritableException e) {
        throw new IOException(e);
    }
}
Also used : SegmentNotWritableException(org.apache.druid.segment.realtime.appenderator.SegmentNotWritableException) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) IOException(java.io.IOException) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) DateTime(org.joda.time.DateTime) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Interval(org.joda.time.Interval)

Aggregations

IOException (java.io.IOException)1 InputRow (org.apache.druid.data.input.InputRow)1 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)1 SegmentIdWithShardSpec (org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec)1 SegmentNotWritableException (org.apache.druid.segment.realtime.appenderator.SegmentNotWritableException)1 LinearShardSpec (org.apache.druid.timeline.partition.LinearShardSpec)1 DruidWritable (org.apache.hadoop.hive.druid.serde.DruidWritable)1 DateTime (org.joda.time.DateTime)1 Interval (org.joda.time.Interval)1