Search in sources :

Example 1 with SQLMetadataConnector

use of io.druid.metadata.SQLMetadataConnector in project hive by apache.

the class DruidStorageHandlerUtils method publishSegmentsAndCommit.

/**
 * First computes the segments timeline to accommodate new segments for insert into case
 * Then moves segments to druid deep storage with updated metadata/version
 * ALL IS DONE IN ONE TRANSACTION
 *
 * @param connector DBI connector to commit
 * @param metadataStorageTablesConfig Druid metadata tables definitions
 * @param dataSource Druid datasource name
 * @param segments List of segments to move and commit to metadata
 * @param overwrite if it is an insert overwrite
 * @param conf Configuration
 * @param dataSegmentPusher segment pusher
 *
 * @return List of successfully published Druid segments.
 * This list has the updated versions and metadata about segments after move and timeline sorting
 *
 * @throws CallbackFailedException
 */
public static List<DataSegment> publishSegmentsAndCommit(final SQLMetadataConnector connector, final MetadataStorageTablesConfig metadataStorageTablesConfig, final String dataSource, final List<DataSegment> segments, boolean overwrite, Configuration conf, DataSegmentPusher dataSegmentPusher) throws CallbackFailedException {
    return connector.getDBI().inTransaction((handle, transactionStatus) -> {
        // We create the timeline for the existing and new segments
        VersionedIntervalTimeline<String, DataSegment> timeline;
        if (overwrite) {
            // If we are overwriting, we disable existing sources
            disableDataSourceWithHandle(handle, metadataStorageTablesConfig, dataSource);
            // When overwriting, we just start with empty timeline,
            // as we are overwriting segments with new versions
            timeline = new VersionedIntervalTimeline<>(Ordering.natural());
        } else {
            // Append Mode
            if (segments.isEmpty()) {
                // If there are no new segments, we can just bail out
                return Collections.EMPTY_LIST;
            }
            // Otherwise, build a timeline of existing segments in metadata storage
            Interval indexedInterval = JodaUtils.umbrellaInterval(Iterables.transform(segments, input -> input.getInterval()));
            LOG.info("Building timeline for umbrella Interval [{}]", indexedInterval);
            timeline = getTimelineForIntervalWithHandle(handle, dataSource, indexedInterval, metadataStorageTablesConfig);
        }
        final List<DataSegment> finalSegmentsToPublish = Lists.newArrayList();
        for (DataSegment segment : segments) {
            List<TimelineObjectHolder<String, DataSegment>> existingChunks = timeline.lookup(segment.getInterval());
            if (existingChunks.size() > 1) {
                // Druid shard specs does not support multiple partitions for same interval with different granularity.
                throw new IllegalStateException(String.format("Cannot allocate new segment for dataSource[%s], interval[%s], already have [%,d] chunks. Not possible to append new segment.", dataSource, segment.getInterval(), existingChunks.size()));
            }
            // Find out the segment with latest version and maximum partition number
            SegmentIdentifier max = null;
            final ShardSpec newShardSpec;
            final String newVersion;
            if (!existingChunks.isEmpty()) {
                // Some existing chunk, Find max
                TimelineObjectHolder<String, DataSegment> existingHolder = Iterables.getOnlyElement(existingChunks);
                for (PartitionChunk<DataSegment> existing : existingHolder.getObject()) {
                    if (max == null || max.getShardSpec().getPartitionNum() < existing.getObject().getShardSpec().getPartitionNum()) {
                        max = SegmentIdentifier.fromDataSegment(existing.getObject());
                    }
                }
            }
            if (max == null) {
                // No existing shard present in the database, use the current version.
                newShardSpec = segment.getShardSpec();
                newVersion = segment.getVersion();
            } else {
                // use version of existing max segment to generate new shard spec
                newShardSpec = getNextPartitionShardSpec(max.getShardSpec());
                newVersion = max.getVersion();
            }
            DataSegment publishedSegment = publishSegmentWithShardSpec(segment, newShardSpec, newVersion, getPath(segment).getFileSystem(conf), dataSegmentPusher);
            finalSegmentsToPublish.add(publishedSegment);
            timeline.add(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().createChunk(publishedSegment));
        }
        // Publish new segments to metadata storage
        final PreparedBatch batch = handle.prepareBatch(String.format("INSERT INTO %1$s (id, dataSource, created_date, start, \"end\", partitioned, version, used, payload) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload)", metadataStorageTablesConfig.getSegmentsTable()));
        for (final DataSegment segment : finalSegmentsToPublish) {
            batch.add(new ImmutableMap.Builder<String, Object>().put("id", segment.getIdentifier()).put("dataSource", segment.getDataSource()).put("created_date", new DateTime().toString()).put("start", segment.getInterval().getStart().toString()).put("end", segment.getInterval().getEnd().toString()).put("partitioned", (segment.getShardSpec() instanceof NoneShardSpec) ? false : true).put("version", segment.getVersion()).put("used", true).put("payload", JSON_MAPPER.writeValueAsBytes(segment)).build());
            LOG.info("Published {}", segment.getIdentifier());
        }
        batch.execute();
        return finalSegmentsToPublish;
    });
}
Also used : SQLMetadataConnector(io.druid.metadata.SQLMetadataConnector) FoldController(org.skife.jdbi.v2.FoldController) Request(com.metamx.http.client.Request) FileSystem(org.apache.hadoop.fs.FileSystem) URL(java.net.URL) HttpMethod(org.jboss.netty.handler.codec.http.HttpMethod) LoggerFactory(org.slf4j.LoggerFactory) RetryPolicies(org.apache.hadoop.io.retry.RetryPolicies) FileStatus(org.apache.hadoop.fs.FileStatus) StatementContext(org.skife.jdbi.v2.StatementContext) InetAddress(java.net.InetAddress) SelectQueryConfig(io.druid.query.select.SelectQueryConfig) InputStreamResponseHandler(com.metamx.http.client.response.InputStreamResponseHandler) IndexIO(io.druid.segment.IndexIO) CharStreams(com.google.common.io.CharStreams) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) Path(org.apache.hadoop.fs.Path) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) DataSegmentPusher(io.druid.segment.loading.DataSegmentPusher) TimestampFloorExprMacro(io.druid.query.expression.TimestampFloorExprMacro) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) ByteArrayMapper(org.skife.jdbi.v2.util.ByteArrayMapper) DataSegment(io.druid.timeline.DataSegment) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) MapUtils(com.metamx.common.MapUtils) Collection(java.util.Collection) Set(java.util.Set) Interner(com.google.common.collect.Interner) Reader(java.io.Reader) MetadataStorageTablesConfig(io.druid.metadata.MetadataStorageTablesConfig) FileNotFoundException(java.io.FileNotFoundException) TimestampParseExprMacro(io.druid.query.expression.TimestampParseExprMacro) List(java.util.List) PartitionChunk(io.druid.timeline.partition.PartitionChunk) ISOChronology(org.joda.time.chrono.ISOChronology) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) TrimExprMacro(io.druid.query.expression.TrimExprMacro) HttpClient(com.metamx.http.client.HttpClient) Iterables(com.google.common.collect.Iterables) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) TimestampFormatExprMacro(io.druid.query.expression.TimestampFormatExprMacro) SegmentIdentifier(io.druid.segment.realtime.appenderator.SegmentIdentifier) TimestampExtractExprMacro(io.druid.query.expression.TimestampExtractExprMacro) HdfsDataSegmentPusher(io.druid.storage.hdfs.HdfsDataSegmentPusher) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) RegexpExtractExprMacro(io.druid.query.expression.RegexpExtractExprMacro) LikeExprMacro(io.druid.query.expression.LikeExprMacro) TimestampCeilExprMacro(io.druid.query.expression.TimestampCeilExprMacro) ShardSpec(io.druid.timeline.partition.ShardSpec) ArrayList(java.util.ArrayList) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) HashSet(java.util.HashSet) IndexMergerV9(io.druid.segment.IndexMergerV9) Interval(org.joda.time.Interval) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) JodaUtils(com.metamx.common.JodaUtils) ImmutableList(com.google.common.collect.ImmutableList) StringUtils(org.apache.hadoop.util.StringUtils) ResultIterator(org.skife.jdbi.v2.ResultIterator) TimestampShiftExprMacro(io.druid.query.expression.TimestampShiftExprMacro) OutputStream(java.io.OutputStream) HttpHeaders(org.jboss.netty.handler.codec.http.HttpHeaders) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) Logger(org.slf4j.Logger) Folder3(org.skife.jdbi.v2.Folder3) HandleCallback(org.skife.jdbi.v2.tweak.HandleCallback) EmittingLogger(com.metamx.emitter.EmittingLogger) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) Throwables(com.google.common.base.Throwables) Interners(com.google.common.collect.Interners) Query(org.skife.jdbi.v2.Query) IOException(java.io.IOException) InputStreamReader(java.io.InputStreamReader) UnknownHostException(java.net.UnknownHostException) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) HdfsDataSegmentPusherConfig(io.druid.storage.hdfs.HdfsDataSegmentPusherConfig) Handle(org.skife.jdbi.v2.Handle) Ordering(com.google.common.collect.Ordering) ExprMacroTable(io.druid.math.expr.ExprMacroTable) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) HiveDruidSerializationModule(org.apache.hadoop.hive.druid.serde.HiveDruidSerializationModule) RetryProxy(org.apache.hadoop.io.retry.RetryProxy) NoopEmitter(com.metamx.emitter.core.NoopEmitter) ServiceEmitter(com.metamx.emitter.service.ServiceEmitter) Collections(java.util.Collections) MySQLConnector(io.druid.metadata.storage.mysql.MySQLConnector) InputStream(java.io.InputStream) SegmentIdentifier(io.druid.segment.realtime.appenderator.SegmentIdentifier) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) DataSegment(io.druid.timeline.DataSegment) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) ShardSpec(io.druid.timeline.partition.ShardSpec) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) ImmutableMap(com.google.common.collect.ImmutableMap) DateTime(org.joda.time.DateTime) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) Interval(org.joda.time.Interval)

Aggregations

InjectableValues (com.fasterxml.jackson.databind.InjectableValues)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 NamedType (com.fasterxml.jackson.databind.jsontype.NamedType)1 SmileFactory (com.fasterxml.jackson.dataformat.smile.SmileFactory)1 Throwables (com.google.common.base.Throwables)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Interner (com.google.common.collect.Interner)1 Interners (com.google.common.collect.Interners)1 Iterables (com.google.common.collect.Iterables)1 Lists (com.google.common.collect.Lists)1 Ordering (com.google.common.collect.Ordering)1 CharStreams (com.google.common.io.CharStreams)1 JodaUtils (com.metamx.common.JodaUtils)1 MapUtils (com.metamx.common.MapUtils)1 EmittingLogger (com.metamx.emitter.EmittingLogger)1 NoopEmitter (com.metamx.emitter.core.NoopEmitter)1 ServiceEmitter (com.metamx.emitter.service.ServiceEmitter)1 HttpClient (com.metamx.http.client.HttpClient)1 Request (com.metamx.http.client.Request)1