use of io.druid.timeline.VersionedIntervalTimeline in project hive by apache.
the class DruidStorageHandlerUtils method getTimelineForIntervalWithHandle.
private static VersionedIntervalTimeline<String, DataSegment> getTimelineForIntervalWithHandle(final Handle handle, final String dataSource, final Interval interval, final MetadataStorageTablesConfig dbTables) throws IOException {
Query<Map<String, Object>> sql = handle.createQuery(String.format("SELECT payload FROM %s WHERE used = true AND dataSource = ? AND start <= ? AND \"end\" >= ?", dbTables.getSegmentsTable())).bind(0, dataSource).bind(1, interval.getEnd().toString()).bind(2, interval.getStart().toString());
final VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.natural());
final ResultIterator<byte[]> dbSegments = sql.map(ByteArrayMapper.FIRST).iterator();
try {
while (dbSegments.hasNext()) {
final byte[] payload = dbSegments.next();
DataSegment segment = JSON_MAPPER.readValue(payload, DataSegment.class);
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
} finally {
dbSegments.close();
}
return timeline;
}
use of io.druid.timeline.VersionedIntervalTimeline in project hive by apache.
the class DruidStorageHandlerUtils method publishSegmentsAndCommit.
/**
* First computes the segments timeline to accommodate new segments for insert into case
* Then moves segments to druid deep storage with updated metadata/version
* ALL IS DONE IN ONE TRANSACTION
*
* @param connector DBI connector to commit
* @param metadataStorageTablesConfig Druid metadata tables definitions
* @param dataSource Druid datasource name
* @param segments List of segments to move and commit to metadata
* @param overwrite if it is an insert overwrite
* @param conf Configuration
* @param dataSegmentPusher segment pusher
*
* @return List of successfully published Druid segments.
* This list has the updated versions and metadata about segments after move and timeline sorting
*
* @throws CallbackFailedException
*/
public static List<DataSegment> publishSegmentsAndCommit(final SQLMetadataConnector connector, final MetadataStorageTablesConfig metadataStorageTablesConfig, final String dataSource, final List<DataSegment> segments, boolean overwrite, Configuration conf, DataSegmentPusher dataSegmentPusher) throws CallbackFailedException {
return connector.getDBI().inTransaction((handle, transactionStatus) -> {
// We create the timeline for the existing and new segments
VersionedIntervalTimeline<String, DataSegment> timeline;
if (overwrite) {
// If we are overwriting, we disable existing sources
disableDataSourceWithHandle(handle, metadataStorageTablesConfig, dataSource);
// When overwriting, we just start with empty timeline,
// as we are overwriting segments with new versions
timeline = new VersionedIntervalTimeline<>(Ordering.natural());
} else {
// Append Mode
if (segments.isEmpty()) {
// If there are no new segments, we can just bail out
return Collections.EMPTY_LIST;
}
// Otherwise, build a timeline of existing segments in metadata storage
Interval indexedInterval = JodaUtils.umbrellaInterval(Iterables.transform(segments, input -> input.getInterval()));
LOG.info("Building timeline for umbrella Interval [{}]", indexedInterval);
timeline = getTimelineForIntervalWithHandle(handle, dataSource, indexedInterval, metadataStorageTablesConfig);
}
final List<DataSegment> finalSegmentsToPublish = Lists.newArrayList();
for (DataSegment segment : segments) {
List<TimelineObjectHolder<String, DataSegment>> existingChunks = timeline.lookup(segment.getInterval());
if (existingChunks.size() > 1) {
// Druid shard specs does not support multiple partitions for same interval with different granularity.
throw new IllegalStateException(String.format("Cannot allocate new segment for dataSource[%s], interval[%s], already have [%,d] chunks. Not possible to append new segment.", dataSource, segment.getInterval(), existingChunks.size()));
}
// Find out the segment with latest version and maximum partition number
SegmentIdentifier max = null;
final ShardSpec newShardSpec;
final String newVersion;
if (!existingChunks.isEmpty()) {
// Some existing chunk, Find max
TimelineObjectHolder<String, DataSegment> existingHolder = Iterables.getOnlyElement(existingChunks);
for (PartitionChunk<DataSegment> existing : existingHolder.getObject()) {
if (max == null || max.getShardSpec().getPartitionNum() < existing.getObject().getShardSpec().getPartitionNum()) {
max = SegmentIdentifier.fromDataSegment(existing.getObject());
}
}
}
if (max == null) {
// No existing shard present in the database, use the current version.
newShardSpec = segment.getShardSpec();
newVersion = segment.getVersion();
} else {
// use version of existing max segment to generate new shard spec
newShardSpec = getNextPartitionShardSpec(max.getShardSpec());
newVersion = max.getVersion();
}
DataSegment publishedSegment = publishSegmentWithShardSpec(segment, newShardSpec, newVersion, getPath(segment).getFileSystem(conf), dataSegmentPusher);
finalSegmentsToPublish.add(publishedSegment);
timeline.add(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().createChunk(publishedSegment));
}
// Publish new segments to metadata storage
final PreparedBatch batch = handle.prepareBatch(String.format("INSERT INTO %1$s (id, dataSource, created_date, start, \"end\", partitioned, version, used, payload) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload)", metadataStorageTablesConfig.getSegmentsTable()));
for (final DataSegment segment : finalSegmentsToPublish) {
batch.add(new ImmutableMap.Builder<String, Object>().put("id", segment.getIdentifier()).put("dataSource", segment.getDataSource()).put("created_date", new DateTime().toString()).put("start", segment.getInterval().getStart().toString()).put("end", segment.getInterval().getEnd().toString()).put("partitioned", (segment.getShardSpec() instanceof NoneShardSpec) ? false : true).put("version", segment.getVersion()).put("used", true).put("payload", JSON_MAPPER.writeValueAsBytes(segment)).build());
LOG.info("Published {}", segment.getIdentifier());
}
batch.execute();
return finalSegmentsToPublish;
});
}
use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class ClientInfoResourceTest method setup.
@Before
public void setup() {
VersionedIntervalTimeline<String, ServerSelector> timeline = new VersionedIntervalTimeline<>(Ordering.<String>natural());
DruidServer server = new DruidServer("name", "host", 1234, "type", "tier", 0);
addSegment(timeline, server, "1960-02-13/1961-02-14", ImmutableList.of("d5"), ImmutableList.of("m5"), "v0");
// segments within [2014-02-13, 2014-02-18]
addSegment(timeline, server, "2014-02-13/2014-02-14", ImmutableList.of("d1"), ImmutableList.of("m1"), "v0");
addSegment(timeline, server, "2014-02-14/2014-02-15", ImmutableList.of("d1"), ImmutableList.of("m1"), "v0");
addSegment(timeline, server, "2014-02-16/2014-02-17", ImmutableList.of("d1"), ImmutableList.of("m1"), "v0");
addSegment(timeline, server, "2014-02-17/2014-02-18", ImmutableList.of("d2"), ImmutableList.of("m2"), "v0");
// segments within [2015-02-01, 2015-02-13]
addSegment(timeline, server, "2015-02-01/2015-02-07", ImmutableList.of("d1"), ImmutableList.of("m1"), "v1");
addSegment(timeline, server, "2015-02-07/2015-02-13", ImmutableList.of("d1"), ImmutableList.of("m1"), "v1");
addSegmentWithShardSpec(timeline, server, "2015-02-03/2015-02-05", ImmutableList.of("d1", "d2"), ImmutableList.of("m1", "m2"), "v2", new NumberedShardSpec(0, 2));
addSegmentWithShardSpec(timeline, server, "2015-02-03/2015-02-05", ImmutableList.of("d1", "d2", "d3"), ImmutableList.of("m1", "m2", "m3"), "v2", new NumberedShardSpec(1, 2));
addSegment(timeline, server, "2015-02-09/2015-02-10", ImmutableList.of("d1", "d3"), ImmutableList.of("m1", "m3"), "v2");
addSegment(timeline, server, "2015-02-11/2015-02-12", ImmutableList.of("d3"), ImmutableList.of("m3"), "v2");
// segments within [2015-03-13, 2015-03-19]
addSegment(timeline, server, "2015-03-13/2015-03-19", ImmutableList.of("d1"), ImmutableList.of("m1"), "v3");
addSegment(timeline, server, "2015-03-13/2015-03-14", ImmutableList.of("d1"), ImmutableList.of("m1"), "v4");
addSegment(timeline, server, "2015-03-14/2015-03-15", ImmutableList.of("d1"), ImmutableList.of("m1"), "v5");
addSegment(timeline, server, "2015-03-15/2015-03-16", ImmutableList.of("d1"), ImmutableList.of("m1"), "v6");
// imcomplete segment
addSegmentWithShardSpec(timeline, server, "2015-04-03/2015-04-05", ImmutableList.of("d4"), ImmutableList.of("m4"), "v7", new NumberedShardSpec(0, 2));
serverInventoryView = EasyMock.createMock(FilteredServerInventoryView.class);
EasyMock.expect(serverInventoryView.getInventory()).andReturn(ImmutableList.of(server)).anyTimes();
timelineServerView = EasyMock.createMock(TimelineServerView.class);
EasyMock.expect(timelineServerView.getTimeline(EasyMock.anyObject(TableDataSource.class))).andReturn(timeline);
EasyMock.replay(serverInventoryView, timelineServerView);
resource = getResourceTestHelper(serverInventoryView, timelineServerView, new SegmentMetadataQueryConfig());
}
use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class NumberedShardSpecTest method testVersionedIntervalTimelineBehaviorForNumberedShardSpec.
private void testVersionedIntervalTimelineBehaviorForNumberedShardSpec(List<PartitionChunk<String>> chunks, Set<String> expectedObjects) {
VersionedIntervalTimeline<String, String> timeline = new VersionedIntervalTimeline<>(Ordering.natural());
Interval interval = new Interval("2000/3000");
String version = "v1";
for (PartitionChunk<String> chunk : chunks) {
timeline.add(interval, version, chunk);
}
Set<String> actualObjects = new HashSet<>();
List<TimelineObjectHolder<String, String>> entries = timeline.lookup(interval);
for (TimelineObjectHolder<String, String> entry : entries) {
for (PartitionChunk<String> chunk : entry.getObject()) {
actualObjects.add(chunk.getObject());
}
}
Assert.assertEquals(expectedObjects, actualObjects);
}
use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class DruidCoordinatorCleanupOvershadowed method run.
@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
CoordinatorStats stats = new CoordinatorStats();
// Unservice old partitions if we've had enough time to make sure we aren't flapping with old data
if (params.hasDeletionWaitTimeElapsed()) {
DruidCluster cluster = params.getDruidCluster();
Map<String, VersionedIntervalTimeline<String, DataSegment>> timelines = Maps.newHashMap();
for (MinMaxPriorityQueue<ServerHolder> serverHolders : cluster.getSortedServersByTier()) {
for (ServerHolder serverHolder : serverHolders) {
ImmutableDruidServer server = serverHolder.getServer();
for (ImmutableDruidDataSource dataSource : server.getDataSources()) {
VersionedIntervalTimeline<String, DataSegment> timeline = timelines.get(dataSource.getName());
if (timeline == null) {
timeline = new VersionedIntervalTimeline<>(Comparators.comparable());
timelines.put(dataSource.getName(), timeline);
}
for (DataSegment segment : dataSource.getSegments()) {
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
}
}
}
//Remove all segments in db that are overshadowed by served segments
for (DataSegment dataSegment : params.getAvailableSegments()) {
VersionedIntervalTimeline<String, DataSegment> timeline = timelines.get(dataSegment.getDataSource());
if (timeline != null && timeline.isOvershadowed(dataSegment.getInterval(), dataSegment.getVersion())) {
coordinator.removeSegment(dataSegment);
stats.addToGlobalStat("overShadowedCount", 1);
}
}
}
return params.buildFromExisting().withCoordinatorStats(stats).build();
}
Aggregations