use of io.druid.timeline.partition.PartitionChunk in project druid by druid-io.
the class IngestSegmentFirehoseFactory method connect.
@Override
public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException {
log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
if (taskToolbox == null) {
// Noop Task is just used to create the toolbox and list segments.
taskToolbox = injector.getInstance(TaskToolboxFactory.class).build(new NoopTask("reingest", 0, 0, null, null, null));
}
try {
final List<DataSegment> usedSegments = taskToolbox.getTaskActionClient().submit(new SegmentListUsedAction(dataSource, interval, null));
final Map<DataSegment, File> segmentFileMap = taskToolbox.fetchSegments(usedSegments);
VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.<String>natural().nullsFirst());
for (DataSegment segment : usedSegments) {
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
final List<String> dims;
if (dimensions != null) {
dims = dimensions;
} else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
} else {
Set<String> dimSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> timelineObjectHolder) {
return Iterables.concat(Iterables.transform(timelineObjectHolder.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(PartitionChunk<DataSegment> input) {
return input.getObject().getDimensions();
}
}));
}
})));
dims = Lists.newArrayList(Sets.difference(dimSet, inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions()));
}
final List<String> metricsList;
if (metrics != null) {
metricsList = metrics;
} else {
Set<String> metricsSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> input) {
return Iterables.concat(Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(PartitionChunk<DataSegment> input) {
return input.getObject().getMetrics();
}
}));
}
})));
metricsList = Lists.newArrayList(metricsSet);
}
final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {
@Override
public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {
@Override
public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
final DataSegment segment = input.getObject();
try {
return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getIdentifier()))), holder.getInterval());
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
});
}
})));
return new IngestSegmentFirehose(adapters, dims, metricsList, dimFilter, Granularities.NONE);
} catch (IOException e) {
throw Throwables.propagate(e);
} catch (SegmentLoadingException e) {
throw Throwables.propagate(e);
}
}
use of io.druid.timeline.partition.PartitionChunk in project druid by druid-io.
the class NumberedShardSpecTest method testPartitionChunks.
@Test
public void testPartitionChunks() {
final List<ShardSpec> specs = ImmutableList.<ShardSpec>of(new NumberedShardSpec(0, 3), new NumberedShardSpec(1, 3), new NumberedShardSpec(2, 3));
final List<PartitionChunk<String>> chunks = Lists.transform(specs, new Function<ShardSpec, PartitionChunk<String>>() {
@Override
public PartitionChunk<String> apply(ShardSpec shardSpec) {
return shardSpec.createChunk("rofl");
}
});
Assert.assertEquals(0, chunks.get(0).getChunkNumber());
Assert.assertEquals(1, chunks.get(1).getChunkNumber());
Assert.assertEquals(2, chunks.get(2).getChunkNumber());
Assert.assertTrue(chunks.get(0).isStart());
Assert.assertFalse(chunks.get(1).isStart());
Assert.assertFalse(chunks.get(2).isStart());
Assert.assertFalse(chunks.get(0).isEnd());
Assert.assertFalse(chunks.get(1).isEnd());
Assert.assertTrue(chunks.get(2).isEnd());
Assert.assertTrue(chunks.get(0).abuts(chunks.get(1)));
Assert.assertTrue(chunks.get(1).abuts(chunks.get(2)));
Assert.assertFalse(chunks.get(0).abuts(chunks.get(0)));
Assert.assertFalse(chunks.get(0).abuts(chunks.get(2)));
Assert.assertFalse(chunks.get(1).abuts(chunks.get(0)));
Assert.assertFalse(chunks.get(1).abuts(chunks.get(1)));
Assert.assertFalse(chunks.get(2).abuts(chunks.get(0)));
Assert.assertFalse(chunks.get(2).abuts(chunks.get(1)));
Assert.assertFalse(chunks.get(2).abuts(chunks.get(2)));
}
use of io.druid.timeline.partition.PartitionChunk in project druid by druid-io.
the class CachingClusteredClient method run.
@Override
public Sequence<T> run(final Query<T> query, final Map<String, Object> responseContext) {
final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
final CacheStrategy<T, Object, Query<T>> strategy = toolChest.getCacheStrategy(query);
final Map<DruidServer, List<SegmentDescriptor>> serverSegments = Maps.newTreeMap();
final List<Pair<Interval, byte[]>> cachedResults = Lists.newArrayList();
final Map<String, CachePopulator> cachePopulatorMap = Maps.newHashMap();
final boolean useCache = CacheUtil.useCacheOnBrokers(query, strategy, cacheConfig);
final boolean populateCache = CacheUtil.populateCacheOnBrokers(query, strategy, cacheConfig);
final boolean isBySegment = BaseQuery.getContextBySegment(query, false);
final ImmutableMap.Builder<String, Object> contextBuilder = new ImmutableMap.Builder<>();
final int priority = BaseQuery.getContextPriority(query, 0);
contextBuilder.put("priority", priority);
if (populateCache) {
// prevent down-stream nodes from caching results as well if we are populating the cache
contextBuilder.put(CacheConfig.POPULATE_CACHE, false);
contextBuilder.put("bySegment", true);
}
TimelineLookup<String, ServerSelector> timeline = serverView.getTimeline(query.getDataSource());
if (timeline == null) {
return Sequences.empty();
}
// build set of segments to query
Set<Pair<ServerSelector, SegmentDescriptor>> segments = Sets.newLinkedHashSet();
List<TimelineObjectHolder<String, ServerSelector>> serversLookup = Lists.newLinkedList();
// Note that enabling this leads to putting uncovered intervals information in the response headers
// and might blow up in some cases https://github.com/druid-io/druid/issues/2108
int uncoveredIntervalsLimit = BaseQuery.getContextUncoveredIntervalsLimit(query, 0);
if (uncoveredIntervalsLimit > 0) {
List<Interval> uncoveredIntervals = Lists.newArrayListWithCapacity(uncoveredIntervalsLimit);
boolean uncoveredIntervalsOverflowed = false;
for (Interval interval : query.getIntervals()) {
Iterable<TimelineObjectHolder<String, ServerSelector>> lookup = timeline.lookup(interval);
long startMillis = interval.getStartMillis();
long endMillis = interval.getEndMillis();
for (TimelineObjectHolder<String, ServerSelector> holder : lookup) {
Interval holderInterval = holder.getInterval();
long intervalStart = holderInterval.getStartMillis();
if (!uncoveredIntervalsOverflowed && startMillis != intervalStart) {
if (uncoveredIntervalsLimit > uncoveredIntervals.size()) {
uncoveredIntervals.add(new Interval(startMillis, intervalStart));
} else {
uncoveredIntervalsOverflowed = true;
}
}
startMillis = holderInterval.getEndMillis();
serversLookup.add(holder);
}
if (!uncoveredIntervalsOverflowed && startMillis < endMillis) {
if (uncoveredIntervalsLimit > uncoveredIntervals.size()) {
uncoveredIntervals.add(new Interval(startMillis, endMillis));
} else {
uncoveredIntervalsOverflowed = true;
}
}
}
if (!uncoveredIntervals.isEmpty()) {
// This returns intervals for which NO segment is present.
// Which is not necessarily an indication that the data doesn't exist or is
// incomplete. The data could exist and just not be loaded yet. In either
// case, though, this query will not include any data from the identified intervals.
responseContext.put("uncoveredIntervals", uncoveredIntervals);
responseContext.put("uncoveredIntervalsOverflowed", uncoveredIntervalsOverflowed);
}
} else {
for (Interval interval : query.getIntervals()) {
Iterables.addAll(serversLookup, timeline.lookup(interval));
}
}
// Let tool chest filter out unneeded segments
final List<TimelineObjectHolder<String, ServerSelector>> filteredServersLookup = toolChest.filterSegments(query, serversLookup);
Map<String, Optional<RangeSet<String>>> dimensionRangeCache = Maps.newHashMap();
// Filter unneeded chunks based on partition dimension
for (TimelineObjectHolder<String, ServerSelector> holder : filteredServersLookup) {
final Set<PartitionChunk<ServerSelector>> filteredChunks = DimFilterUtils.filterShards(query.getFilter(), holder.getObject(), new Function<PartitionChunk<ServerSelector>, ShardSpec>() {
@Override
public ShardSpec apply(PartitionChunk<ServerSelector> input) {
return input.getObject().getSegment().getShardSpec();
}
}, dimensionRangeCache);
for (PartitionChunk<ServerSelector> chunk : filteredChunks) {
ServerSelector selector = chunk.getObject();
final SegmentDescriptor descriptor = new SegmentDescriptor(holder.getInterval(), holder.getVersion(), chunk.getChunkNumber());
segments.add(Pair.of(selector, descriptor));
}
}
final byte[] queryCacheKey;
if (// implies strategy != null
(populateCache || useCache) && // explicit bySegment queries are never cached
!isBySegment) {
queryCacheKey = strategy.computeCacheKey(query);
} else {
queryCacheKey = null;
}
if (query.getContext().get(QueryResource.HDR_IF_NONE_MATCH) != null) {
String prevEtag = (String) query.getContext().get(QueryResource.HDR_IF_NONE_MATCH);
//compute current Etag
Hasher hasher = Hashing.sha1().newHasher();
boolean hasOnlyHistoricalSegments = true;
for (Pair<ServerSelector, SegmentDescriptor> p : segments) {
if (!p.lhs.pick().getServer().isAssignable()) {
hasOnlyHistoricalSegments = false;
break;
}
hasher.putString(p.lhs.getSegment().getIdentifier(), Charsets.UTF_8);
}
if (hasOnlyHistoricalSegments) {
hasher.putBytes(queryCacheKey == null ? strategy.computeCacheKey(query) : queryCacheKey);
String currEtag = Base64.encodeBase64String(hasher.hash().asBytes());
responseContext.put(QueryResource.HDR_ETAG, currEtag);
if (prevEtag.equals(currEtag)) {
return Sequences.empty();
}
}
}
if (queryCacheKey != null) {
// cachKeys map must preserve segment ordering, in order for shards to always be combined in the same order
Map<Pair<ServerSelector, SegmentDescriptor>, Cache.NamedKey> cacheKeys = Maps.newLinkedHashMap();
for (Pair<ServerSelector, SegmentDescriptor> segment : segments) {
final Cache.NamedKey segmentCacheKey = CacheUtil.computeSegmentCacheKey(segment.lhs.getSegment().getIdentifier(), segment.rhs, queryCacheKey);
cacheKeys.put(segment, segmentCacheKey);
}
// Pull cached segments from cache and remove from set of segments to query
final Map<Cache.NamedKey, byte[]> cachedValues;
if (useCache) {
cachedValues = cache.getBulk(Iterables.limit(cacheKeys.values(), cacheConfig.getCacheBulkMergeLimit()));
} else {
cachedValues = ImmutableMap.of();
}
for (Map.Entry<Pair<ServerSelector, SegmentDescriptor>, Cache.NamedKey> entry : cacheKeys.entrySet()) {
Pair<ServerSelector, SegmentDescriptor> segment = entry.getKey();
Cache.NamedKey segmentCacheKey = entry.getValue();
final Interval segmentQueryInterval = segment.rhs.getInterval();
final byte[] cachedValue = cachedValues.get(segmentCacheKey);
if (cachedValue != null) {
// remove cached segment from set of segments to query
segments.remove(segment);
cachedResults.add(Pair.of(segmentQueryInterval, cachedValue));
} else if (populateCache) {
// otherwise, if populating cache, add segment to list of segments to cache
final String segmentIdentifier = segment.lhs.getSegment().getIdentifier();
cachePopulatorMap.put(String.format("%s_%s", segmentIdentifier, segmentQueryInterval), new CachePopulator(cache, objectMapper, segmentCacheKey));
}
}
}
// Compile list of all segments not pulled from cache
for (Pair<ServerSelector, SegmentDescriptor> segment : segments) {
final QueryableDruidServer queryableDruidServer = segment.lhs.pick();
if (queryableDruidServer == null) {
log.makeAlert("No servers found for SegmentDescriptor[%s] for DataSource[%s]?! How can this be?!", segment.rhs, query.getDataSource()).emit();
} else {
final DruidServer server = queryableDruidServer.getServer();
List<SegmentDescriptor> descriptors = serverSegments.get(server);
if (descriptors == null) {
descriptors = Lists.newArrayList();
serverSegments.put(server, descriptors);
}
descriptors.add(segment.rhs);
}
}
return new LazySequence<>(new Supplier<Sequence<T>>() {
@Override
public Sequence<T> get() {
ArrayList<Sequence<T>> sequencesByInterval = Lists.newArrayList();
addSequencesFromCache(sequencesByInterval);
addSequencesFromServer(sequencesByInterval);
return mergeCachedAndUncachedSequences(query, sequencesByInterval);
}
private void addSequencesFromCache(ArrayList<Sequence<T>> listOfSequences) {
if (strategy == null) {
return;
}
final Function<Object, T> pullFromCacheFunction = strategy.pullFromCache();
final TypeReference<Object> cacheObjectClazz = strategy.getCacheObjectClazz();
for (Pair<Interval, byte[]> cachedResultPair : cachedResults) {
final byte[] cachedResult = cachedResultPair.rhs;
Sequence<Object> cachedSequence = new BaseSequence<>(new BaseSequence.IteratorMaker<Object, Iterator<Object>>() {
@Override
public Iterator<Object> make() {
try {
if (cachedResult.length == 0) {
return Iterators.emptyIterator();
}
return objectMapper.readValues(objectMapper.getFactory().createParser(cachedResult), cacheObjectClazz);
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
@Override
public void cleanup(Iterator<Object> iterFromMake) {
}
});
listOfSequences.add(Sequences.map(cachedSequence, pullFromCacheFunction));
}
}
private void addSequencesFromServer(ArrayList<Sequence<T>> listOfSequences) {
listOfSequences.ensureCapacity(listOfSequences.size() + serverSegments.size());
final Query<T> rewrittenQuery = query.withOverriddenContext(contextBuilder.build());
// The data gets handled as a Future and parsed in the long Sequence chain in the resultSeqToAdd setter.
for (Map.Entry<DruidServer, List<SegmentDescriptor>> entry : serverSegments.entrySet()) {
final DruidServer server = entry.getKey();
final List<SegmentDescriptor> descriptors = entry.getValue();
final QueryRunner clientQueryable = serverView.getQueryRunner(server);
if (clientQueryable == null) {
log.error("WTF!? server[%s] doesn't have a client Queryable?", server);
continue;
}
final MultipleSpecificSegmentSpec segmentSpec = new MultipleSpecificSegmentSpec(descriptors);
final Sequence<T> resultSeqToAdd;
if (!server.isAssignable() || !populateCache || isBySegment) {
// Direct server queryable
if (!isBySegment) {
resultSeqToAdd = clientQueryable.run(query.withQuerySegmentSpec(segmentSpec), responseContext);
} else {
// bySegment queries need to be de-serialized, see DirectDruidClient.run()
@SuppressWarnings("unchecked") final Query<Result<BySegmentResultValueClass<T>>> bySegmentQuery = (Query<Result<BySegmentResultValueClass<T>>>) ((Query) query);
@SuppressWarnings("unchecked") final Sequence<Result<BySegmentResultValueClass<T>>> resultSequence = clientQueryable.run(bySegmentQuery.withQuerySegmentSpec(segmentSpec), responseContext);
resultSeqToAdd = (Sequence) Sequences.map(resultSequence, new Function<Result<BySegmentResultValueClass<T>>, Result<BySegmentResultValueClass<T>>>() {
@Override
public Result<BySegmentResultValueClass<T>> apply(Result<BySegmentResultValueClass<T>> input) {
final BySegmentResultValueClass<T> bySegmentValue = input.getValue();
return new Result<>(input.getTimestamp(), new BySegmentResultValueClass<T>(Lists.transform(bySegmentValue.getResults(), toolChest.makePreComputeManipulatorFn(query, MetricManipulatorFns.deserializing())), bySegmentValue.getSegmentId(), bySegmentValue.getInterval()));
}
});
}
} else {
// Requires some manipulation on broker side
@SuppressWarnings("unchecked") final Sequence<Result<BySegmentResultValueClass<T>>> runningSequence = clientQueryable.run(rewrittenQuery.withQuerySegmentSpec(segmentSpec), responseContext);
resultSeqToAdd = new MergeSequence(query.getResultOrdering(), Sequences.<Result<BySegmentResultValueClass<T>>, Sequence<T>>map(runningSequence, new Function<Result<BySegmentResultValueClass<T>>, Sequence<T>>() {
private final Function<T, Object> cacheFn = strategy.prepareForCache();
// Acctually do something with the results
@Override
public Sequence<T> apply(Result<BySegmentResultValueClass<T>> input) {
final BySegmentResultValueClass<T> value = input.getValue();
final CachePopulator cachePopulator = cachePopulatorMap.get(String.format("%s_%s", value.getSegmentId(), value.getInterval()));
final Queue<ListenableFuture<Object>> cacheFutures = new ConcurrentLinkedQueue<>();
return Sequences.<T>withEffect(Sequences.<T, T>map(Sequences.<T, T>map(Sequences.<T>simple(value.getResults()), new Function<T, T>() {
@Override
public T apply(final T input) {
if (cachePopulator != null) {
// only compute cache data if populating cache
cacheFutures.add(backgroundExecutorService.submit(new Callable<Object>() {
@Override
public Object call() {
return cacheFn.apply(input);
}
}));
}
return input;
}
}), toolChest.makePreComputeManipulatorFn(// This casting is sub-optimal, but hasn't caused any major problems yet...
(Query) rewrittenQuery, MetricManipulatorFns.deserializing())), new Runnable() {
@Override
public void run() {
if (cachePopulator != null) {
Futures.addCallback(Futures.allAsList(cacheFutures), new FutureCallback<List<Object>>() {
@Override
public void onSuccess(List<Object> cacheData) {
cachePopulator.populate(cacheData);
// Help out GC by making sure all references are gone
cacheFutures.clear();
}
@Override
public void onFailure(Throwable throwable) {
log.error(throwable, "Background caching failed");
}
}, backgroundExecutorService);
}
}
}, MoreExecutors.sameThreadExecutor());
// End withEffect
}
}));
}
listOfSequences.add(resultSeqToAdd);
}
}
});
}
use of io.druid.timeline.partition.PartitionChunk in project druid by druid-io.
the class ServerManager method getQueryRunnerForSegments.
@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(Query<T> query, Iterable<SegmentDescriptor> specs) {
final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query);
if (factory == null) {
log.makeAlert("Unknown query type, [%s]", query.getClass()).addData("dataSource", query.getDataSource()).emit();
return new NoopQueryRunner<T>();
}
final QueryToolChest<T, Query<T>> toolChest = factory.getToolchest();
String dataSourceName = getDataSourceName(query.getDataSource());
final VersionedIntervalTimeline<String, ReferenceCountingSegment> timeline = dataSources.get(dataSourceName);
if (timeline == null) {
return new NoopQueryRunner<T>();
}
final Function<Query<T>, ServiceMetricEvent.Builder> builderFn = getBuilderFn(toolChest);
final AtomicLong cpuTimeAccumulator = new AtomicLong(0L);
FunctionalIterable<QueryRunner<T>> queryRunners = FunctionalIterable.create(specs).transformCat(new Function<SegmentDescriptor, Iterable<QueryRunner<T>>>() {
@Override
@SuppressWarnings("unchecked")
public Iterable<QueryRunner<T>> apply(SegmentDescriptor input) {
final PartitionHolder<ReferenceCountingSegment> entry = timeline.findEntry(input.getInterval(), input.getVersion());
if (entry == null) {
return Arrays.<QueryRunner<T>>asList(new ReportTimelineMissingSegmentQueryRunner<T>(input));
}
final PartitionChunk<ReferenceCountingSegment> chunk = entry.getChunk(input.getPartitionNumber());
if (chunk == null) {
return Arrays.<QueryRunner<T>>asList(new ReportTimelineMissingSegmentQueryRunner<T>(input));
}
final ReferenceCountingSegment adapter = chunk.getObject();
return Arrays.asList(buildAndDecorateQueryRunner(factory, toolChest, adapter, input, builderFn, cpuTimeAccumulator));
}
});
return CPUTimeMetricQueryRunner.safeBuild(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(exec, queryRunners)), toolChest), builderFn, emitter, cpuTimeAccumulator, true);
}
use of io.druid.timeline.partition.PartitionChunk in project druid by druid-io.
the class DatasourcesResource method getSegmentDataSourceSpecificInterval.
/**
* Provides serverView for a datasource and Interval which gives details about servers hosting segments for an interval
* Used by the realtime tasks to fetch a view of the interval they are interested in.
*/
@GET
@Path("/{dataSourceName}/intervals/{interval}/serverview")
@Produces(MediaType.APPLICATION_JSON)
@ResourceFilters(DatasourceResourceFilter.class)
public Response getSegmentDataSourceSpecificInterval(@PathParam("dataSourceName") String dataSourceName, @PathParam("interval") String interval, @QueryParam("partial") final boolean partial) {
TimelineLookup<String, SegmentLoadInfo> timeline = serverInventoryView.getTimeline(new TableDataSource(dataSourceName));
final Interval theInterval = new Interval(interval.replace("_", "/"));
if (timeline == null) {
log.debug("No timeline found for datasource[%s]", dataSourceName);
return Response.ok(Lists.<ImmutableSegmentLoadInfo>newArrayList()).build();
}
Iterable<TimelineObjectHolder<String, SegmentLoadInfo>> lookup = timeline.lookupWithIncompletePartitions(theInterval);
FunctionalIterable<ImmutableSegmentLoadInfo> retval = FunctionalIterable.create(lookup).transformCat(new Function<TimelineObjectHolder<String, SegmentLoadInfo>, Iterable<ImmutableSegmentLoadInfo>>() {
@Override
public Iterable<ImmutableSegmentLoadInfo> apply(TimelineObjectHolder<String, SegmentLoadInfo> input) {
return Iterables.transform(input.getObject(), new Function<PartitionChunk<SegmentLoadInfo>, ImmutableSegmentLoadInfo>() {
@Override
public ImmutableSegmentLoadInfo apply(PartitionChunk<SegmentLoadInfo> chunk) {
return chunk.getObject().toImmutableSegmentLoadInfo();
}
});
}
});
return Response.ok(retval).build();
}
Aggregations