Search in sources :

Example 6 with VersionedIntervalTimeline

use of io.druid.timeline.VersionedIntervalTimeline in project hive by apache.

the class DruidStorageHandlerUtils method getTimelineForIntervalWithHandle.

private static VersionedIntervalTimeline<String, DataSegment> getTimelineForIntervalWithHandle(final Handle handle, final String dataSource, final Interval interval, final MetadataStorageTablesConfig dbTables) throws IOException {
    Query<Map<String, Object>> sql = handle.createQuery(String.format("SELECT payload FROM %s WHERE used = true AND dataSource = ? AND start <= ? AND \"end\" >= ?", dbTables.getSegmentsTable())).bind(0, dataSource).bind(1, interval.getEnd().toString()).bind(2, interval.getStart().toString());
    final VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.natural());
    final ResultIterator<byte[]> dbSegments = sql.map(ByteArrayMapper.FIRST).iterator();
    try {
        while (dbSegments.hasNext()) {
            final byte[] payload = dbSegments.next();
            DataSegment segment = JSON_MAPPER.readValue(payload, DataSegment.class);
            timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
        }
    } finally {
        dbSegments.close();
    }
    return timeline;
}
Also used : VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) DataSegment(io.druid.timeline.DataSegment)

Example 7 with VersionedIntervalTimeline

use of io.druid.timeline.VersionedIntervalTimeline in project hive by apache.

the class DruidStorageHandlerUtils method publishSegmentsAndCommit.

/**
 * First computes the segments timeline to accommodate new segments for insert into case
 * Then moves segments to druid deep storage with updated metadata/version
 * ALL IS DONE IN ONE TRANSACTION
 *
 * @param connector DBI connector to commit
 * @param metadataStorageTablesConfig Druid metadata tables definitions
 * @param dataSource Druid datasource name
 * @param segments List of segments to move and commit to metadata
 * @param overwrite if it is an insert overwrite
 * @param conf Configuration
 * @param dataSegmentPusher segment pusher
 *
 * @return List of successfully published Druid segments.
 * This list has the updated versions and metadata about segments after move and timeline sorting
 *
 * @throws CallbackFailedException
 */
public static List<DataSegment> publishSegmentsAndCommit(final SQLMetadataConnector connector, final MetadataStorageTablesConfig metadataStorageTablesConfig, final String dataSource, final List<DataSegment> segments, boolean overwrite, Configuration conf, DataSegmentPusher dataSegmentPusher) throws CallbackFailedException {
    return connector.getDBI().inTransaction((handle, transactionStatus) -> {
        // We create the timeline for the existing and new segments
        VersionedIntervalTimeline<String, DataSegment> timeline;
        if (overwrite) {
            // If we are overwriting, we disable existing sources
            disableDataSourceWithHandle(handle, metadataStorageTablesConfig, dataSource);
            // When overwriting, we just start with empty timeline,
            // as we are overwriting segments with new versions
            timeline = new VersionedIntervalTimeline<>(Ordering.natural());
        } else {
            // Append Mode
            if (segments.isEmpty()) {
                // If there are no new segments, we can just bail out
                return Collections.EMPTY_LIST;
            }
            // Otherwise, build a timeline of existing segments in metadata storage
            Interval indexedInterval = JodaUtils.umbrellaInterval(Iterables.transform(segments, input -> input.getInterval()));
            LOG.info("Building timeline for umbrella Interval [{}]", indexedInterval);
            timeline = getTimelineForIntervalWithHandle(handle, dataSource, indexedInterval, metadataStorageTablesConfig);
        }
        final List<DataSegment> finalSegmentsToPublish = Lists.newArrayList();
        for (DataSegment segment : segments) {
            List<TimelineObjectHolder<String, DataSegment>> existingChunks = timeline.lookup(segment.getInterval());
            if (existingChunks.size() > 1) {
                // Druid shard specs does not support multiple partitions for same interval with different granularity.
                throw new IllegalStateException(String.format("Cannot allocate new segment for dataSource[%s], interval[%s], already have [%,d] chunks. Not possible to append new segment.", dataSource, segment.getInterval(), existingChunks.size()));
            }
            // Find out the segment with latest version and maximum partition number
            SegmentIdentifier max = null;
            final ShardSpec newShardSpec;
            final String newVersion;
            if (!existingChunks.isEmpty()) {
                // Some existing chunk, Find max
                TimelineObjectHolder<String, DataSegment> existingHolder = Iterables.getOnlyElement(existingChunks);
                for (PartitionChunk<DataSegment> existing : existingHolder.getObject()) {
                    if (max == null || max.getShardSpec().getPartitionNum() < existing.getObject().getShardSpec().getPartitionNum()) {
                        max = SegmentIdentifier.fromDataSegment(existing.getObject());
                    }
                }
            }
            if (max == null) {
                // No existing shard present in the database, use the current version.
                newShardSpec = segment.getShardSpec();
                newVersion = segment.getVersion();
            } else {
                // use version of existing max segment to generate new shard spec
                newShardSpec = getNextPartitionShardSpec(max.getShardSpec());
                newVersion = max.getVersion();
            }
            DataSegment publishedSegment = publishSegmentWithShardSpec(segment, newShardSpec, newVersion, getPath(segment).getFileSystem(conf), dataSegmentPusher);
            finalSegmentsToPublish.add(publishedSegment);
            timeline.add(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().createChunk(publishedSegment));
        }
        // Publish new segments to metadata storage
        final PreparedBatch batch = handle.prepareBatch(String.format("INSERT INTO %1$s (id, dataSource, created_date, start, \"end\", partitioned, version, used, payload) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload)", metadataStorageTablesConfig.getSegmentsTable()));
        for (final DataSegment segment : finalSegmentsToPublish) {
            batch.add(new ImmutableMap.Builder<String, Object>().put("id", segment.getIdentifier()).put("dataSource", segment.getDataSource()).put("created_date", new DateTime().toString()).put("start", segment.getInterval().getStart().toString()).put("end", segment.getInterval().getEnd().toString()).put("partitioned", (segment.getShardSpec() instanceof NoneShardSpec) ? false : true).put("version", segment.getVersion()).put("used", true).put("payload", JSON_MAPPER.writeValueAsBytes(segment)).build());
            LOG.info("Published {}", segment.getIdentifier());
        }
        batch.execute();
        return finalSegmentsToPublish;
    });
}
Also used : SQLMetadataConnector(io.druid.metadata.SQLMetadataConnector) FoldController(org.skife.jdbi.v2.FoldController) Request(com.metamx.http.client.Request) FileSystem(org.apache.hadoop.fs.FileSystem) URL(java.net.URL) HttpMethod(org.jboss.netty.handler.codec.http.HttpMethod) LoggerFactory(org.slf4j.LoggerFactory) RetryPolicies(org.apache.hadoop.io.retry.RetryPolicies) FileStatus(org.apache.hadoop.fs.FileStatus) StatementContext(org.skife.jdbi.v2.StatementContext) InetAddress(java.net.InetAddress) SelectQueryConfig(io.druid.query.select.SelectQueryConfig) InputStreamResponseHandler(com.metamx.http.client.response.InputStreamResponseHandler) IndexIO(io.druid.segment.IndexIO) CharStreams(com.google.common.io.CharStreams) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) Path(org.apache.hadoop.fs.Path) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) DataSegmentPusher(io.druid.segment.loading.DataSegmentPusher) TimestampFloorExprMacro(io.druid.query.expression.TimestampFloorExprMacro) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) ByteArrayMapper(org.skife.jdbi.v2.util.ByteArrayMapper) DataSegment(io.druid.timeline.DataSegment) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) MapUtils(com.metamx.common.MapUtils) Collection(java.util.Collection) Set(java.util.Set) Interner(com.google.common.collect.Interner) Reader(java.io.Reader) MetadataStorageTablesConfig(io.druid.metadata.MetadataStorageTablesConfig) FileNotFoundException(java.io.FileNotFoundException) TimestampParseExprMacro(io.druid.query.expression.TimestampParseExprMacro) List(java.util.List) PartitionChunk(io.druid.timeline.partition.PartitionChunk) ISOChronology(org.joda.time.chrono.ISOChronology) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) TrimExprMacro(io.druid.query.expression.TrimExprMacro) HttpClient(com.metamx.http.client.HttpClient) Iterables(com.google.common.collect.Iterables) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) TimestampFormatExprMacro(io.druid.query.expression.TimestampFormatExprMacro) SegmentIdentifier(io.druid.segment.realtime.appenderator.SegmentIdentifier) TimestampExtractExprMacro(io.druid.query.expression.TimestampExtractExprMacro) HdfsDataSegmentPusher(io.druid.storage.hdfs.HdfsDataSegmentPusher) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) RegexpExtractExprMacro(io.druid.query.expression.RegexpExtractExprMacro) LikeExprMacro(io.druid.query.expression.LikeExprMacro) TimestampCeilExprMacro(io.druid.query.expression.TimestampCeilExprMacro) ShardSpec(io.druid.timeline.partition.ShardSpec) ArrayList(java.util.ArrayList) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) HashSet(java.util.HashSet) IndexMergerV9(io.druid.segment.IndexMergerV9) Interval(org.joda.time.Interval) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) JodaUtils(com.metamx.common.JodaUtils) ImmutableList(com.google.common.collect.ImmutableList) StringUtils(org.apache.hadoop.util.StringUtils) ResultIterator(org.skife.jdbi.v2.ResultIterator) TimestampShiftExprMacro(io.druid.query.expression.TimestampShiftExprMacro) OutputStream(java.io.OutputStream) HttpHeaders(org.jboss.netty.handler.codec.http.HttpHeaders) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) Logger(org.slf4j.Logger) Folder3(org.skife.jdbi.v2.Folder3) HandleCallback(org.skife.jdbi.v2.tweak.HandleCallback) EmittingLogger(com.metamx.emitter.EmittingLogger) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) Throwables(com.google.common.base.Throwables) Interners(com.google.common.collect.Interners) Query(org.skife.jdbi.v2.Query) IOException(java.io.IOException) InputStreamReader(java.io.InputStreamReader) UnknownHostException(java.net.UnknownHostException) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) HdfsDataSegmentPusherConfig(io.druid.storage.hdfs.HdfsDataSegmentPusherConfig) Handle(org.skife.jdbi.v2.Handle) Ordering(com.google.common.collect.Ordering) ExprMacroTable(io.druid.math.expr.ExprMacroTable) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) HiveDruidSerializationModule(org.apache.hadoop.hive.druid.serde.HiveDruidSerializationModule) RetryProxy(org.apache.hadoop.io.retry.RetryProxy) NoopEmitter(com.metamx.emitter.core.NoopEmitter) ServiceEmitter(com.metamx.emitter.service.ServiceEmitter) Collections(java.util.Collections) MySQLConnector(io.druid.metadata.storage.mysql.MySQLConnector) InputStream(java.io.InputStream) SegmentIdentifier(io.druid.segment.realtime.appenderator.SegmentIdentifier) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) DataSegment(io.druid.timeline.DataSegment) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) ShardSpec(io.druid.timeline.partition.ShardSpec) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) ImmutableMap(com.google.common.collect.ImmutableMap) DateTime(org.joda.time.DateTime) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) Interval(org.joda.time.Interval)

Example 8 with VersionedIntervalTimeline

use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.

the class ClientInfoResourceTest method setup.

@Before
public void setup() {
    VersionedIntervalTimeline<String, ServerSelector> timeline = new VersionedIntervalTimeline<>(Ordering.<String>natural());
    DruidServer server = new DruidServer("name", "host", 1234, "type", "tier", 0);
    addSegment(timeline, server, "1960-02-13/1961-02-14", ImmutableList.of("d5"), ImmutableList.of("m5"), "v0");
    // segments within [2014-02-13, 2014-02-18]
    addSegment(timeline, server, "2014-02-13/2014-02-14", ImmutableList.of("d1"), ImmutableList.of("m1"), "v0");
    addSegment(timeline, server, "2014-02-14/2014-02-15", ImmutableList.of("d1"), ImmutableList.of("m1"), "v0");
    addSegment(timeline, server, "2014-02-16/2014-02-17", ImmutableList.of("d1"), ImmutableList.of("m1"), "v0");
    addSegment(timeline, server, "2014-02-17/2014-02-18", ImmutableList.of("d2"), ImmutableList.of("m2"), "v0");
    // segments within [2015-02-01, 2015-02-13]
    addSegment(timeline, server, "2015-02-01/2015-02-07", ImmutableList.of("d1"), ImmutableList.of("m1"), "v1");
    addSegment(timeline, server, "2015-02-07/2015-02-13", ImmutableList.of("d1"), ImmutableList.of("m1"), "v1");
    addSegmentWithShardSpec(timeline, server, "2015-02-03/2015-02-05", ImmutableList.of("d1", "d2"), ImmutableList.of("m1", "m2"), "v2", new NumberedShardSpec(0, 2));
    addSegmentWithShardSpec(timeline, server, "2015-02-03/2015-02-05", ImmutableList.of("d1", "d2", "d3"), ImmutableList.of("m1", "m2", "m3"), "v2", new NumberedShardSpec(1, 2));
    addSegment(timeline, server, "2015-02-09/2015-02-10", ImmutableList.of("d1", "d3"), ImmutableList.of("m1", "m3"), "v2");
    addSegment(timeline, server, "2015-02-11/2015-02-12", ImmutableList.of("d3"), ImmutableList.of("m3"), "v2");
    // segments within [2015-03-13, 2015-03-19]
    addSegment(timeline, server, "2015-03-13/2015-03-19", ImmutableList.of("d1"), ImmutableList.of("m1"), "v3");
    addSegment(timeline, server, "2015-03-13/2015-03-14", ImmutableList.of("d1"), ImmutableList.of("m1"), "v4");
    addSegment(timeline, server, "2015-03-14/2015-03-15", ImmutableList.of("d1"), ImmutableList.of("m1"), "v5");
    addSegment(timeline, server, "2015-03-15/2015-03-16", ImmutableList.of("d1"), ImmutableList.of("m1"), "v6");
    // imcomplete segment
    addSegmentWithShardSpec(timeline, server, "2015-04-03/2015-04-05", ImmutableList.of("d4"), ImmutableList.of("m4"), "v7", new NumberedShardSpec(0, 2));
    serverInventoryView = EasyMock.createMock(FilteredServerInventoryView.class);
    EasyMock.expect(serverInventoryView.getInventory()).andReturn(ImmutableList.of(server)).anyTimes();
    timelineServerView = EasyMock.createMock(TimelineServerView.class);
    EasyMock.expect(timelineServerView.getTimeline(EasyMock.anyObject(TableDataSource.class))).andReturn(timeline);
    EasyMock.replay(serverInventoryView, timelineServerView);
    resource = getResourceTestHelper(serverInventoryView, timelineServerView, new SegmentMetadataQueryConfig());
}
Also used : FilteredServerInventoryView(io.druid.client.FilteredServerInventoryView) ServerSelector(io.druid.client.selector.ServerSelector) TableDataSource(io.druid.query.TableDataSource) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) DruidServer(io.druid.client.DruidServer) TimelineServerView(io.druid.client.TimelineServerView) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) SegmentMetadataQueryConfig(io.druid.query.metadata.SegmentMetadataQueryConfig) Before(org.junit.Before)

Example 9 with VersionedIntervalTimeline

use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.

the class NumberedShardSpecTest method testVersionedIntervalTimelineBehaviorForNumberedShardSpec.

private void testVersionedIntervalTimelineBehaviorForNumberedShardSpec(List<PartitionChunk<String>> chunks, Set<String> expectedObjects) {
    VersionedIntervalTimeline<String, String> timeline = new VersionedIntervalTimeline<>(Ordering.natural());
    Interval interval = new Interval("2000/3000");
    String version = "v1";
    for (PartitionChunk<String> chunk : chunks) {
        timeline.add(interval, version, chunk);
    }
    Set<String> actualObjects = new HashSet<>();
    List<TimelineObjectHolder<String, String>> entries = timeline.lookup(interval);
    for (TimelineObjectHolder<String, String> entry : entries) {
        for (PartitionChunk<String> chunk : entry.getObject()) {
            actualObjects.add(chunk.getObject());
        }
    }
    Assert.assertEquals(expectedObjects, actualObjects);
}
Also used : TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) Interval(org.joda.time.Interval) HashSet(java.util.HashSet)

Example 10 with VersionedIntervalTimeline

use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.

the class DruidCoordinatorCleanupOvershadowed method run.

@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
    CoordinatorStats stats = new CoordinatorStats();
    // Unservice old partitions if we've had enough time to make sure we aren't flapping with old data
    if (params.hasDeletionWaitTimeElapsed()) {
        DruidCluster cluster = params.getDruidCluster();
        Map<String, VersionedIntervalTimeline<String, DataSegment>> timelines = Maps.newHashMap();
        for (MinMaxPriorityQueue<ServerHolder> serverHolders : cluster.getSortedServersByTier()) {
            for (ServerHolder serverHolder : serverHolders) {
                ImmutableDruidServer server = serverHolder.getServer();
                for (ImmutableDruidDataSource dataSource : server.getDataSources()) {
                    VersionedIntervalTimeline<String, DataSegment> timeline = timelines.get(dataSource.getName());
                    if (timeline == null) {
                        timeline = new VersionedIntervalTimeline<>(Comparators.comparable());
                        timelines.put(dataSource.getName(), timeline);
                    }
                    for (DataSegment segment : dataSource.getSegments()) {
                        timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
                    }
                }
            }
        }
        //Remove all segments in db that are overshadowed by served segments
        for (DataSegment dataSegment : params.getAvailableSegments()) {
            VersionedIntervalTimeline<String, DataSegment> timeline = timelines.get(dataSegment.getDataSource());
            if (timeline != null && timeline.isOvershadowed(dataSegment.getInterval(), dataSegment.getVersion())) {
                coordinator.removeSegment(dataSegment);
                stats.addToGlobalStat("overShadowedCount", 1);
            }
        }
    }
    return params.buildFromExisting().withCoordinatorStats(stats).build();
}
Also used : CoordinatorStats(io.druid.server.coordinator.CoordinatorStats) ImmutableDruidDataSource(io.druid.client.ImmutableDruidDataSource) ServerHolder(io.druid.server.coordinator.ServerHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) DruidCluster(io.druid.server.coordinator.DruidCluster) DataSegment(io.druid.timeline.DataSegment) ImmutableDruidServer(io.druid.client.ImmutableDruidServer)

Aggregations

VersionedIntervalTimeline (io.druid.timeline.VersionedIntervalTimeline)15 DataSegment (io.druid.timeline.DataSegment)12 Interval (org.joda.time.Interval)10 TimelineObjectHolder (io.druid.timeline.TimelineObjectHolder)7 IOException (java.io.IOException)5 Map (java.util.Map)5 PartitionChunk (io.druid.timeline.partition.PartitionChunk)4 HashSet (java.util.HashSet)4 Function (com.google.common.base.Function)3 DateTime (org.joda.time.DateTime)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 CharSource (com.google.common.io.CharSource)2 IncrementalIndexSegment (io.druid.segment.IncrementalIndexSegment)2 Segment (io.druid.segment.Segment)2 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)2 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)2 CoordinatorStats (io.druid.server.coordinator.CoordinatorStats)2 NoneShardSpec (io.druid.timeline.partition.NoneShardSpec)2 ShardSpec (io.druid.timeline.partition.ShardSpec)2 SingleElementPartitionChunk (io.druid.timeline.partition.SingleElementPartitionChunk)2