Examples with NoneShardSpec - org.apache.druid.timeline.partition.NoneShardSpec

Example 1 with NoneShardSpec

use of org.apache.druid.timeline.partition.NoneShardSpec in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinator method announceHistoricalSegmentBatch.

/**
 * Attempts to insert a single segment to the database. If the segment already exists, will do nothing; although,
 * this checking is imperfect and callers must be prepared to retry their entire transaction on exceptions.
 *
 * @return DataSegment set inserted
 */
private Set<DataSegment> announceHistoricalSegmentBatch(final Handle handle, final Set<DataSegment> segments, final Set<DataSegment> usedSegments) throws IOException {
    final Set<DataSegment> toInsertSegments = new HashSet<>();
    try {
        Set<String> existedSegments = segmentExistsBatch(handle, segments);
        log.info("Found these segments already exist in DB: %s", existedSegments);
        for (DataSegment segment : segments) {
            if (!existedSegments.contains(segment.getId().toString())) {
                toInsertSegments.add(segment);
            }
        }
        // SELECT -> INSERT can fail due to races; callers must be prepared to retry.
        // Avoiding ON DUPLICATE KEY since it's not portable.
        // Avoiding try/catch since it may cause inadvertent transaction-splitting.
        final List<List<DataSegment>> partitionedSegments = Lists.partition(new ArrayList<>(toInsertSegments), MAX_NUM_SEGMENTS_TO_ANNOUNCE_AT_ONCE);
        PreparedBatch preparedBatch = handle.prepareBatch(StringUtils.format("INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, partitioned, version, used, payload) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload)", dbTables.getSegmentsTable(), connector.getQuoteString()));
        for (List<DataSegment> partition : partitionedSegments) {
            for (DataSegment segment : partition) {
                preparedBatch.add().bind("id", segment.getId().toString()).bind("dataSource", segment.getDataSource()).bind("created_date", DateTimes.nowUtc().toString()).bind("start", segment.getInterval().getStart().toString()).bind("end", segment.getInterval().getEnd().toString()).bind("partitioned", (segment.getShardSpec() instanceof NoneShardSpec) ? false : true).bind("version", segment.getVersion()).bind("used", usedSegments.contains(segment)).bind("payload", jsonMapper.writeValueAsBytes(segment));
            }
            final int[] affectedRows = preparedBatch.execute();
            final boolean succeeded = Arrays.stream(affectedRows).allMatch(eachAffectedRows -> eachAffectedRows == 1);
            if (succeeded) {
                log.infoSegments(partition, "Published segments to DB");
            } else {
                final List<DataSegment> failedToPublish = IntStream.range(0, partition.size()).filter(i -> affectedRows[i] != 1).mapToObj(partition::get).collect(Collectors.toList());
                throw new ISE("Failed to publish segments to DB: %s", SegmentUtils.commaSeparatedIdentifiers(failedToPublish));
            }
        }
    } catch (Exception e) {
        log.errorSegments(segments, "Exception inserting segments");
        throw e;
    }
    return toInsertSegments;
}

Also used : NoneShardSpec(org.apache.druid.timeline.partition.NoneShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) ISE(org.apache.druid.java.util.common.ISE) HashSet(java.util.HashSet)

Example 2 with NoneShardSpec

use of org.apache.druid.timeline.partition.NoneShardSpec in project hive by apache.

the class DruidStorageHandlerUtils method publishSegmentsAndCommit.

/**
 * First computes the segments timeline to accommodate new segments for insert into case.
 * Then moves segments to druid deep storage with updated metadata/version.
 * ALL IS DONE IN ONE TRANSACTION
 *
 * @param connector                   DBI connector to commit
 * @param metadataStorageTablesConfig Druid metadata tables definitions
 * @param dataSource                  Druid datasource name
 * @param segments                    List of segments to move and commit to metadata
 * @param overwrite                   if it is an insert overwrite
 * @param conf                        Configuration
 * @param dataSegmentPusher           segment pusher
 * @return List of successfully published Druid segments.
 * This list has the updated versions and metadata about segments after move and timeline sorting
 * @throws CallbackFailedException in case the connector can not add the segment to the DB.
 */
@SuppressWarnings("unchecked")
static List<DataSegment> publishSegmentsAndCommit(final SQLMetadataConnector connector, final MetadataStorageTablesConfig metadataStorageTablesConfig, final String dataSource, final List<DataSegment> segments, boolean overwrite, Configuration conf, DataSegmentPusher dataSegmentPusher) throws CallbackFailedException {
    return connector.getDBI().inTransaction((handle, transactionStatus) -> {
        // We create the timeline for the existing and new segments
        VersionedIntervalTimeline<String, DataSegment> timeline;
        if (overwrite) {
            // If we are overwriting, we disable existing sources
            disableDataSourceWithHandle(handle, metadataStorageTablesConfig, dataSource);
            // When overwriting, we just start with empty timeline,
            // as we are overwriting segments with new versions
            timeline = new VersionedIntervalTimeline<>(Ordering.natural());
        } else {
            // Append Mode
            if (segments.isEmpty()) {
                // If there are no new segments, we can just bail out
                return Collections.EMPTY_LIST;
            }
            // Otherwise, build a timeline of existing segments in metadata storage
            Interval indexedInterval = JodaUtils.umbrellaInterval(segments.stream().map(DataSegment::getInterval).collect(Collectors.toList()));
            LOG.info("Building timeline for umbrella Interval [{}]", indexedInterval);
            timeline = getTimelineForIntervalWithHandle(handle, dataSource, indexedInterval, metadataStorageTablesConfig);
        }
        final List<DataSegment> finalSegmentsToPublish = Lists.newArrayList();
        for (DataSegment segment : segments) {
            List<TimelineObjectHolder<String, DataSegment>> existingChunks = timeline.lookup(segment.getInterval());
            if (existingChunks.size() > 1) {
                // Druid shard specs does not support multiple partitions for same interval with different granularity.
                throw new IllegalStateException(String.format("Cannot allocate new segment for dataSource[%s], interval[%s], already have [%,d] chunks. " + "Not possible to append new segment.", dataSource, segment.getInterval(), existingChunks.size()));
            }
            // Find out the segment with latest version and maximum partition number
            SegmentIdWithShardSpec max = null;
            final ShardSpec newShardSpec;
            final String newVersion;
            if (!existingChunks.isEmpty()) {
                // Some existing chunk, Find max
                TimelineObjectHolder<String, DataSegment> existingHolder = Iterables.getOnlyElement(existingChunks);
                for (PartitionChunk<DataSegment> existing : existingHolder.getObject()) {
                    if (max == null || max.getShardSpec().getPartitionNum() < existing.getObject().getShardSpec().getPartitionNum()) {
                        max = SegmentIdWithShardSpec.fromDataSegment(existing.getObject());
                    }
                }
            }
            if (max == null) {
                // No existing shard present in the database, use the current version.
                newShardSpec = segment.getShardSpec();
                newVersion = segment.getVersion();
            } else {
                // use version of existing max segment to generate new shard spec
                newShardSpec = getNextPartitionShardSpec(max.getShardSpec());
                newVersion = max.getVersion();
            }
            DataSegment publishedSegment = publishSegmentWithShardSpec(segment, newShardSpec, newVersion, getPath(segment).getFileSystem(conf), dataSegmentPusher);
            finalSegmentsToPublish.add(publishedSegment);
            timeline.add(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().createChunk(publishedSegment));
        }
        // Publish new segments to metadata storage
        final PreparedBatch batch = handle.prepareBatch(String.format("INSERT INTO %1$s (id, dataSource, created_date, start, \"end\", partitioned, version, used, payload) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload)", metadataStorageTablesConfig.getSegmentsTable()));
        for (final DataSegment segment : finalSegmentsToPublish) {
            batch.add(new ImmutableMap.Builder<String, Object>().put("id", segment.getId().toString()).put("dataSource", segment.getDataSource()).put("created_date", new DateTime().toString()).put("start", segment.getInterval().getStart().toString()).put("end", segment.getInterval().getEnd().toString()).put("partitioned", !(segment.getShardSpec() instanceof NoneShardSpec)).put("version", segment.getVersion()).put("used", true).put("payload", JSON_MAPPER.writeValueAsBytes(segment)).build());
            LOG.info("Published {}", segment.getId().toString());
        }
        batch.execute();
        return finalSegmentsToPublish;
    });
}

Also used : NoneShardSpec(org.apache.druid.timeline.partition.NoneShardSpec) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) DataSegment(org.apache.druid.timeline.DataSegment) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) NoneShardSpec(org.apache.druid.timeline.partition.NoneShardSpec) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) ImmutableMap(com.google.common.collect.ImmutableMap) DateTime(org.joda.time.DateTime) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) Interval(org.joda.time.Interval)

Aggregations

DataSegment (org.apache.druid.timeline.DataSegment)2 NoneShardSpec (org.apache.druid.timeline.partition.NoneShardSpec)2 PreparedBatch (org.skife.jdbi.v2.PreparedBatch)2 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 List (java.util.List)1 ISE (org.apache.druid.java.util.common.ISE)1 SegmentIdWithShardSpec (org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec)1 TimelineObjectHolder (org.apache.druid.timeline.TimelineObjectHolder)1 LinearShardSpec (org.apache.druid.timeline.partition.LinearShardSpec)1 NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)1 ShardSpec (org.apache.druid.timeline.partition.ShardSpec)1 GenericUDFToString (org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString)1 DateTime (org.joda.time.DateTime)1 Interval (org.joda.time.Interval)1 CallbackFailedException (org.skife.jdbi.v2.exceptions.CallbackFailedException)1