Search in sources :

Example 26 with SegmentDescriptor

use of io.druid.query.SegmentDescriptor in project druid by druid-io.

the class RealtimePlumber method persistAndMerge.

// Submits persist-n-merge task for a Sink to the mergeExecutor
private void persistAndMerge(final long truncatedTime, final Sink sink) {
    final String threadName = String.format("%s-%s-persist-n-merge", schema.getDataSource(), new DateTime(truncatedTime));
    mergeExecutor.execute(new ThreadRenamingRunnable(threadName) {

        final Interval interval = sink.getInterval();

        Stopwatch mergeStopwatch = null;

        @Override
        public void doRun() {
            try {
                // Bail out if this sink has been abandoned by a previously-executed task.
                if (sinks.get(truncatedTime) != sink) {
                    log.info("Sink[%s] was abandoned, bailing out of persist-n-merge.", sink);
                    return;
                }
                // Use a file to indicate that pushing has completed.
                final File persistDir = computePersistDir(schema, interval);
                final File mergedTarget = new File(persistDir, "merged");
                final File isPushedMarker = new File(persistDir, "isPushedMarker");
                if (!isPushedMarker.exists()) {
                    removeSegment(sink, mergedTarget);
                    if (mergedTarget.exists()) {
                        log.wtf("Merged target[%s] exists?!", mergedTarget);
                        return;
                    }
                } else {
                    log.info("Already pushed sink[%s]", sink);
                    return;
                }
                /*
            Note: it the plumber crashes after persisting a subset of hydrants then might duplicate data as these
            hydrants will be read but older commitMetadata will be used. fixing this possibly needs structural
            changes to plumber.
             */
                for (FireHydrant hydrant : sink) {
                    synchronized (hydrant) {
                        if (!hydrant.hasSwapped()) {
                            log.info("Hydrant[%s] hasn't swapped yet, swapping. Sink[%s]", hydrant, sink);
                            final int rowCount = persistHydrant(hydrant, schema, interval, null);
                            metrics.incrementRowOutputCount(rowCount);
                        }
                    }
                }
                final long mergeThreadCpuTime = VMUtils.safeGetThreadCpuTime();
                mergeStopwatch = Stopwatch.createStarted();
                List<QueryableIndex> indexes = Lists.newArrayList();
                for (FireHydrant fireHydrant : sink) {
                    Segment segment = fireHydrant.getSegment();
                    final QueryableIndex queryableIndex = segment.asQueryableIndex();
                    log.info("Adding hydrant[%s]", fireHydrant);
                    indexes.add(queryableIndex);
                }
                final File mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), mergedTarget, config.getIndexSpec());
                // emit merge metrics before publishing segment
                metrics.incrementMergeCpuTime(VMUtils.safeGetThreadCpuTime() - mergeThreadCpuTime);
                metrics.incrementMergeTimeMillis(mergeStopwatch.elapsed(TimeUnit.MILLISECONDS));
                QueryableIndex index = indexIO.loadIndex(mergedFile);
                log.info("Pushing [%s] to deep storage", sink.getSegment().getIdentifier());
                DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(Lists.newArrayList(index.getAvailableDimensions())));
                log.info("Inserting [%s] to the metadata store", sink.getSegment().getIdentifier());
                segmentPublisher.publishSegment(segment);
                if (!isPushedMarker.createNewFile()) {
                    log.makeAlert("Failed to create marker file for [%s]", schema.getDataSource()).addData("interval", sink.getInterval()).addData("partitionNum", segment.getShardSpec().getPartitionNum()).addData("marker", isPushedMarker).emit();
                }
            } catch (Exception e) {
                metrics.incrementFailedHandoffs();
                log.makeAlert(e, "Failed to persist merged index[%s]", schema.getDataSource()).addData("interval", interval).emit();
                if (shuttingDown) {
                    // We're trying to shut down, and this segment failed to push. Let's just get rid of it.
                    // This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
                    cleanShutdown = false;
                    abandonSegment(truncatedTime, sink);
                }
            } finally {
                if (mergeStopwatch != null) {
                    mergeStopwatch.stop();
                }
            }
        }
    });
    handoffNotifier.registerSegmentHandoffCallback(new SegmentDescriptor(sink.getInterval(), sink.getVersion(), config.getShardSpec().getPartitionNum()), mergeExecutor, new Runnable() {

        @Override
        public void run() {
            abandonSegment(sink.getInterval().getStartMillis(), sink);
            metrics.incrementHandOffCount();
        }
    });
}
Also used : Stopwatch(com.google.common.base.Stopwatch) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) DataSegment(io.druid.timeline.DataSegment) QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) Segment(io.druid.segment.Segment) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) QueryableIndex(io.druid.segment.QueryableIndex) SegmentDescriptor(io.druid.query.SegmentDescriptor) ThreadRenamingRunnable(io.druid.common.guava.ThreadRenamingRunnable) List(java.util.List) FireHydrant(io.druid.segment.realtime.FireHydrant) ThreadRenamingRunnable(io.druid.common.guava.ThreadRenamingRunnable) File(java.io.File) Interval(org.joda.time.Interval)

Example 27 with SegmentDescriptor

use of io.druid.query.SegmentDescriptor in project druid by druid-io.

the class SinkQuerySegmentWalker method getQueryRunnerForSegments.

@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(final Query<T> query, final Iterable<SegmentDescriptor> specs) {
    // We only handle one particular dataSource. Make sure that's what we have, then ignore from here on out.
    if (!(query.getDataSource() instanceof TableDataSource) || !dataSource.equals(((TableDataSource) query.getDataSource()).getName())) {
        log.makeAlert("Received query for unknown dataSource").addData("dataSource", query.getDataSource()).emit();
        return new NoopQueryRunner<>();
    }
    final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query);
    if (factory == null) {
        throw new ISE("Unknown query type[%s].", query.getClass());
    }
    final QueryToolChest<T, Query<T>> toolChest = factory.getToolchest();
    final Function<Query<T>, ServiceMetricEvent.Builder> builderFn = new Function<Query<T>, ServiceMetricEvent.Builder>() {

        @Override
        public ServiceMetricEvent.Builder apply(@Nullable Query<T> input) {
            return toolChest.makeMetricBuilder(query);
        }
    };
    final boolean skipIncrementalSegment = query.getContextValue(CONTEXT_SKIP_INCREMENTAL_SEGMENT, false);
    final AtomicLong cpuTimeAccumulator = new AtomicLong(0L);
    return CPUTimeMetricQueryRunner.safeBuild(toolChest.mergeResults(factory.mergeRunners(queryExecutorService, FunctionalIterable.create(specs).transform(new Function<SegmentDescriptor, QueryRunner<T>>() {

        @Override
        public QueryRunner<T> apply(final SegmentDescriptor descriptor) {
            final PartitionHolder<Sink> holder = sinkTimeline.findEntry(descriptor.getInterval(), descriptor.getVersion());
            if (holder == null) {
                return new ReportTimelineMissingSegmentQueryRunner<>(descriptor);
            }
            final PartitionChunk<Sink> chunk = holder.getChunk(descriptor.getPartitionNumber());
            if (chunk == null) {
                return new ReportTimelineMissingSegmentQueryRunner<>(descriptor);
            }
            final Sink theSink = chunk.getObject();
            final String sinkSegmentIdentifier = theSink.getSegment().getIdentifier();
            return new SpecificSegmentQueryRunner<>(withPerSinkMetrics(new BySegmentQueryRunner<>(sinkSegmentIdentifier, descriptor.getInterval().getStart(), factory.mergeRunners(MoreExecutors.sameThreadExecutor(), Iterables.transform(theSink, new Function<FireHydrant, QueryRunner<T>>() {

                @Override
                public QueryRunner<T> apply(final FireHydrant hydrant) {
                    // Hydrant might swap at any point, but if it's swapped at the start
                    // then we know it's *definitely* swapped.
                    final boolean hydrantDefinitelySwapped = hydrant.hasSwapped();
                    if (skipIncrementalSegment && !hydrantDefinitelySwapped) {
                        return new NoopQueryRunner<>();
                    }
                    // Prevent the underlying segment from swapping when its being iterated
                    final Pair<Segment, Closeable> segment = hydrant.getAndIncrementSegment();
                    try {
                        QueryRunner<T> baseRunner = QueryRunnerHelper.makeClosingQueryRunner(factory.createRunner(segment.lhs), segment.rhs);
                        // 2) Hydrants are not the same between replicas, make sure cache is local
                        if (hydrantDefinitelySwapped && cache.isLocal()) {
                            return new CachingQueryRunner<>(makeHydrantCacheIdentifier(hydrant), descriptor, objectMapper, cache, toolChest, baseRunner, MoreExecutors.sameThreadExecutor(), cacheConfig);
                        } else {
                            return baseRunner;
                        }
                    } catch (RuntimeException e) {
                        CloseQuietly.close(segment.rhs);
                        throw e;
                    }
                }
            }))), builderFn, sinkSegmentIdentifier, cpuTimeAccumulator), new SpecificSegmentSpec(descriptor));
        }
    }))), builderFn, emitter, cpuTimeAccumulator, true);
}
Also used : Query(io.druid.query.Query) Function(com.google.common.base.Function) ReportTimelineMissingSegmentQueryRunner(io.druid.query.ReportTimelineMissingSegmentQueryRunner) Sink(io.druid.segment.realtime.plumber.Sink) SpecificSegmentQueryRunner(io.druid.query.spec.SpecificSegmentQueryRunner) NoopQueryRunner(io.druid.query.NoopQueryRunner) SegmentDescriptor(io.druid.query.SegmentDescriptor) ISE(io.druid.java.util.common.ISE) FireHydrant(io.druid.segment.realtime.FireHydrant) Pair(io.druid.java.util.common.Pair) BySegmentQueryRunner(io.druid.query.BySegmentQueryRunner) MetricsEmittingQueryRunner(io.druid.query.MetricsEmittingQueryRunner) ReportTimelineMissingSegmentQueryRunner(io.druid.query.ReportTimelineMissingSegmentQueryRunner) CachingQueryRunner(io.druid.client.CachingQueryRunner) BySegmentQueryRunner(io.druid.query.BySegmentQueryRunner) SpecificSegmentQueryRunner(io.druid.query.spec.SpecificSegmentQueryRunner) QueryRunner(io.druid.query.QueryRunner) CPUTimeMetricQueryRunner(io.druid.query.CPUTimeMetricQueryRunner) NoopQueryRunner(io.druid.query.NoopQueryRunner) AtomicLong(java.util.concurrent.atomic.AtomicLong) TableDataSource(io.druid.query.TableDataSource) SpecificSegmentSpec(io.druid.query.spec.SpecificSegmentSpec) ServiceMetricEvent(com.metamx.emitter.service.ServiceMetricEvent) Nullable(javax.annotation.Nullable)

Example 28 with SegmentDescriptor

use of io.druid.query.SegmentDescriptor in project druid by druid-io.

the class ServerViewUtil method getTargetLocations.

public static List<LocatedSegmentDescriptor> getTargetLocations(TimelineServerView serverView, DataSource datasource, List<Interval> intervals, int numCandidates) {
    TimelineLookup<String, ServerSelector> timeline = serverView.getTimeline(datasource);
    if (timeline == null) {
        return Collections.emptyList();
    }
    List<LocatedSegmentDescriptor> located = Lists.newArrayList();
    for (Interval interval : intervals) {
        for (TimelineObjectHolder<String, ServerSelector> holder : timeline.lookup(interval)) {
            for (PartitionChunk<ServerSelector> chunk : holder.getObject()) {
                ServerSelector selector = chunk.getObject();
                final SegmentDescriptor descriptor = new SegmentDescriptor(holder.getInterval(), holder.getVersion(), chunk.getChunkNumber());
                long size = selector.getSegment().getSize();
                List<DruidServerMetadata> candidates = selector.getCandidates(numCandidates);
                located.add(new LocatedSegmentDescriptor(descriptor, size, candidates));
            }
        }
    }
    return located;
}
Also used : ServerSelector(io.druid.client.selector.ServerSelector) LocatedSegmentDescriptor(io.druid.query.LocatedSegmentDescriptor) SegmentDescriptor(io.druid.query.SegmentDescriptor) LocatedSegmentDescriptor(io.druid.query.LocatedSegmentDescriptor) DruidServerMetadata(io.druid.server.coordination.DruidServerMetadata) Interval(org.joda.time.Interval)

Example 29 with SegmentDescriptor

use of io.druid.query.SegmentDescriptor in project druid by druid-io.

the class RealtimeIndexTaskTest method testRestoreAfterHandoffAttemptDuringShutdown.

@Test(timeout = 60_000L)
public void testRestoreAfterHandoffAttemptDuringShutdown() throws Exception {
    final TaskStorage taskStorage = new HeapMemoryTaskStorage(new TaskStorageConfig(null));
    final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
    final File directory = tempFolder.newFolder();
    final RealtimeIndexTask task1 = makeRealtimeTask(null);
    final DataSegment publishedSegment;
    // First run:
    {
        final TaskToolbox taskToolbox = makeToolbox(task1, taskStorage, mdc, directory);
        final ListenableFuture<TaskStatus> statusFuture = runTask(task1, taskToolbox);
        // Wait for firehose to show up, it starts off null.
        while (task1.getFirehose() == null) {
            Thread.sleep(50);
        }
        final TestFirehose firehose = (TestFirehose) task1.getFirehose();
        firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo"))));
        // Stop the firehose, this will trigger a finishJob.
        firehose.close();
        // Wait for publish.
        while (mdc.getPublished().isEmpty()) {
            Thread.sleep(50);
        }
        publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
        // Do a query.
        Assert.assertEquals(1, sumMetric(task1, "rows"));
        // Trigger graceful shutdown.
        task1.stopGracefully();
        // Wait for the task to finish. The status doesn't really matter.
        while (!statusFuture.isDone()) {
            Thread.sleep(50);
        }
    }
    // Second run:
    {
        final RealtimeIndexTask task2 = makeRealtimeTask(task1.getId());
        final TaskToolbox taskToolbox = makeToolbox(task2, taskStorage, mdc, directory);
        final ListenableFuture<TaskStatus> statusFuture = runTask(task2, taskToolbox);
        // Wait for firehose to show up, it starts off null.
        while (task2.getFirehose() == null) {
            Thread.sleep(50);
        }
        // Stop the firehose again, this will start another handoff.
        final TestFirehose firehose = (TestFirehose) task2.getFirehose();
        // Stop the firehose, this will trigger a finishJob.
        firehose.close();
        // publishedSegment is still published. No reason it shouldn't be.
        Assert.assertEquals(ImmutableSet.of(publishedSegment), mdc.getPublished());
        // Wait for a handoffCallback to show up.
        while (handOffCallbacks.isEmpty()) {
            Thread.sleep(50);
        }
        // Simulate handoff.
        for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
            final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
            Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
            executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
        }
        handOffCallbacks.clear();
        // Wait for the task to finish.
        final TaskStatus taskStatus = statusFuture.get();
        Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
    }
}
Also used : TaskStorageConfig(io.druid.indexing.common.config.TaskStorageConfig) HeapMemoryTaskStorage(io.druid.indexing.overlord.HeapMemoryTaskStorage) TaskStatus(io.druid.indexing.common.TaskStatus) DataSegment(io.druid.timeline.DataSegment) TaskToolbox(io.druid.indexing.common.TaskToolbox) Executor(java.util.concurrent.Executor) TaskStorage(io.druid.indexing.overlord.TaskStorage) HeapMemoryTaskStorage(io.druid.indexing.overlord.HeapMemoryTaskStorage) TestIndexerMetadataStorageCoordinator(io.druid.indexing.test.TestIndexerMetadataStorageCoordinator) SegmentDescriptor(io.druid.query.SegmentDescriptor) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) File(java.io.File) Pair(io.druid.java.util.common.Pair) Test(org.junit.Test)

Example 30 with SegmentDescriptor

use of io.druid.query.SegmentDescriptor in project druid by druid-io.

the class RealtimeIndexTaskTest method testBasics.

@Test(timeout = 60_000L)
public void testBasics() throws Exception {
    final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
    final RealtimeIndexTask task = makeRealtimeTask(null);
    final TaskToolbox taskToolbox = makeToolbox(task, mdc, tempFolder.newFolder());
    final ListenableFuture<TaskStatus> statusFuture = runTask(task, taskToolbox);
    final DataSegment publishedSegment;
    // Wait for firehose to show up, it starts off null.
    while (task.getFirehose() == null) {
        Thread.sleep(50);
    }
    final TestFirehose firehose = (TestFirehose) task.getFirehose();
    firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo", "met1", "1")), new MapBasedInputRow(now.minus(new Period("P1D")), ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo", "met1", 2.0)), new MapBasedInputRow(now, ImmutableList.of("dim2"), ImmutableMap.<String, Object>of("dim2", "bar", "met1", 2.0))));
    // Stop the firehose, this will drain out existing events.
    firehose.close();
    // Wait for publish.
    while (mdc.getPublished().isEmpty()) {
        Thread.sleep(50);
    }
    publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
    // Check metrics.
    Assert.assertEquals(2, task.getMetrics().processed());
    Assert.assertEquals(1, task.getMetrics().thrownAway());
    Assert.assertEquals(0, task.getMetrics().unparseable());
    // Do some queries.
    Assert.assertEquals(2, sumMetric(task, "rows"));
    Assert.assertEquals(3, sumMetric(task, "met1"));
    // Simulate handoff.
    for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
        final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
        Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
        executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
    }
    handOffCallbacks.clear();
    // Wait for the task to finish.
    final TaskStatus taskStatus = statusFuture.get();
    Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
}
Also used : Period(org.joda.time.Period) TaskStatus(io.druid.indexing.common.TaskStatus) DataSegment(io.druid.timeline.DataSegment) TaskToolbox(io.druid.indexing.common.TaskToolbox) Executor(java.util.concurrent.Executor) TestIndexerMetadataStorageCoordinator(io.druid.indexing.test.TestIndexerMetadataStorageCoordinator) SegmentDescriptor(io.druid.query.SegmentDescriptor) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Pair(io.druid.java.util.common.Pair) Test(org.junit.Test)

Aggregations

SegmentDescriptor (io.druid.query.SegmentDescriptor)48 Test (org.junit.Test)30 Interval (org.joda.time.Interval)19 TaskStatus (io.druid.indexing.common.TaskStatus)17 QueryRunner (io.druid.query.QueryRunner)12 Map (java.util.Map)11 Query (io.druid.query.Query)10 ImmutableMap (com.google.common.collect.ImmutableMap)8 Pair (io.druid.java.util.common.Pair)8 DataSegment (io.druid.timeline.DataSegment)8 Executor (java.util.concurrent.Executor)8 ImmutableSegmentLoadInfo (io.druid.client.ImmutableSegmentLoadInfo)7 TimeseriesQuery (io.druid.query.timeseries.TimeseriesQuery)7 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)6 IOException (java.io.IOException)6 List (java.util.List)6 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)5 Sequence (io.druid.java.util.common.guava.Sequence)5 SpecificSegmentQueryRunner (io.druid.query.spec.SpecificSegmentQueryRunner)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4