Search in sources :

Example 46 with SegmentDescriptor

use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.

the class SpecificSegmentQueryRunnerTest method validate.

private void validate(ObjectMapper mapper, SegmentDescriptor descriptor, ResponseContext responseContext) throws IOException {
    List<SegmentDescriptor> missingSegments = responseContext.getMissingSegments();
    Assert.assertTrue(missingSegments != null);
    SegmentDescriptor segmentDesc = missingSegments.get(0);
    SegmentDescriptor newDesc = mapper.readValue(mapper.writeValueAsString(segmentDesc), SegmentDescriptor.class);
    Assert.assertEquals(descriptor, newDesc);
}
Also used : SegmentDescriptor(org.apache.druid.query.SegmentDescriptor)

Example 47 with SegmentDescriptor

use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.

the class RealtimePlumber method persistAndMerge.

// Submits persist-n-merge task for a Sink to the mergeExecutor
private void persistAndMerge(final long truncatedTime, final Sink sink) {
    final String threadName = StringUtils.format("%s-%s-persist-n-merge", schema.getDataSource(), DateTimes.utc(truncatedTime));
    mergeExecutor.execute(new ThreadRenamingRunnable(threadName) {

        final Interval interval = sink.getInterval();

        Stopwatch mergeStopwatch = null;

        @Override
        public void doRun() {
            try {
                // Bail out if this sink has been abandoned by a previously-executed task.
                if (sinks.get(truncatedTime) != sink) {
                    log.info("Sink[%s] was abandoned, bailing out of persist-n-merge.", sink);
                    return;
                }
                // Use a file to indicate that pushing has completed.
                final File persistDir = computePersistDir(schema, interval);
                final File mergedTarget = new File(persistDir, "merged");
                final File isPushedMarker = new File(persistDir, "isPushedMarker");
                if (!isPushedMarker.exists()) {
                    removeSegment(sink, mergedTarget);
                    if (mergedTarget.exists()) {
                        log.warn("Merged target[%s] still exists after attempt to delete it; skipping push.", mergedTarget);
                        return;
                    }
                } else {
                    log.info("Already pushed sink[%s]", sink);
                    return;
                }
                /*
            Note: it the plumber crashes after persisting a subset of hydrants then might duplicate data as these
            hydrants will be read but older commitMetadata will be used. fixing this possibly needs structural
            changes to plumber.
             */
                for (FireHydrant hydrant : sink) {
                    synchronized (hydrant) {
                        if (!hydrant.hasSwapped()) {
                            log.info("Hydrant[%s] hasn't swapped yet, swapping. Sink[%s]", hydrant, sink);
                            final int rowCount = persistHydrant(hydrant, schema, interval, null);
                            metrics.incrementRowOutputCount(rowCount);
                        }
                    }
                }
                final long mergeThreadCpuTime = JvmUtils.safeGetThreadCpuTime();
                mergeStopwatch = Stopwatch.createStarted();
                final File mergedFile;
                List<QueryableIndex> indexes = new ArrayList<>();
                Closer closer = Closer.create();
                try {
                    for (FireHydrant fireHydrant : sink) {
                        Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
                        final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
                        log.info("Adding hydrant[%s]", fireHydrant);
                        indexes.add(queryableIndex);
                        closer.register(segmentAndCloseable.rhs);
                    }
                    mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), null, mergedTarget, config.getIndexSpec(), config.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), config.getSegmentWriteOutMediumFactory(), -1);
                } catch (Throwable t) {
                    throw closer.rethrow(t);
                } finally {
                    closer.close();
                }
                // emit merge metrics before publishing segment
                metrics.incrementMergeCpuTime(JvmUtils.safeGetThreadCpuTime() - mergeThreadCpuTime);
                metrics.incrementMergeTimeMillis(mergeStopwatch.elapsed(TimeUnit.MILLISECONDS));
                log.info("Pushing [%s] to deep storage", sink.getSegment().getId());
                DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec())), false);
                log.info("Inserting [%s] to the metadata store", sink.getSegment().getId());
                segmentPublisher.publishSegment(segment);
                if (!isPushedMarker.createNewFile()) {
                    log.makeAlert("Failed to create marker file for [%s]", schema.getDataSource()).addData("interval", sink.getInterval()).addData("partitionNum", segment.getShardSpec().getPartitionNum()).addData("marker", isPushedMarker).emit();
                }
            } catch (Exception e) {
                metrics.incrementFailedHandoffs();
                log.makeAlert(e, "Failed to persist merged index[%s]", schema.getDataSource()).addData("interval", interval).emit();
                if (shuttingDown) {
                    // We're trying to shut down, and this segment failed to push. Let's just get rid of it.
                    // This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
                    cleanShutdown = false;
                    abandonSegment(truncatedTime, sink);
                }
            } finally {
                if (mergeStopwatch != null) {
                    mergeStopwatch.stop();
                }
            }
        }
    });
    handoffNotifier.registerSegmentHandoffCallback(new SegmentDescriptor(sink.getInterval(), sink.getVersion(), config.getShardSpec().getPartitionNum()), mergeExecutor, new Runnable() {

        @Override
        public void run() {
            abandonSegment(sink.getInterval().getStartMillis(), sink);
            metrics.incrementHandOffCount();
        }
    });
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) Stopwatch(com.google.common.base.Stopwatch) DataSegment(org.apache.druid.timeline.DataSegment) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) QueryableIndex(org.apache.druid.segment.QueryableIndex) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) ThreadRenamingRunnable(org.apache.druid.common.guava.ThreadRenamingRunnable) List(java.util.List) ArrayList(java.util.ArrayList) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) ThreadRenamingRunnable(org.apache.druid.common.guava.ThreadRenamingRunnable) File(java.io.File) Interval(org.joda.time.Interval) Pair(org.apache.druid.java.util.common.Pair) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator)

Example 48 with SegmentDescriptor

use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.

the class KinesisIndexTaskTest method testRunWithTransformSpec.

@Test(timeout = 120_000L)
public void testRunWithTransformSpec() throws Exception {
    recordSupplier.assign(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(recordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
    recordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
    EasyMock.expectLastCall().anyTimes();
    EasyMock.expect(recordSupplier.poll(EasyMock.anyLong())).andReturn(generateRecords(0, 13)).once();
    recordSupplier.close();
    EasyMock.expectLastCall().once();
    replayAll();
    final KinesisIndexTask task = createTask(null, NEW_DATA_SCHEMA.withTransformSpec(new TransformSpec(new SelectorDimFilter("dim1", "b", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())))), new KinesisIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "0"), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "4")), true, null, null, INPUT_FORMAT, "awsEndpoint", null, null, null, null, false));
    final ListenableFuture<TaskStatus> future = runTask(task);
    // Wait for the task to start reading
    while (task.getRunner().getStatus() != SeekableStreamIndexTaskRunner.Status.READING) {
        Thread.sleep(10);
    }
    // Wait for task to exit
    Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
    verifyAll();
    // Check metrics
    Assert.assertEquals(1, task.getRunner().getRowIngestionMeters().getProcessed());
    Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getUnparseable());
    Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getThrownAway());
    // Check published metadata
    assertEqualsExceptVersion(ImmutableList.of(sdd("2009/P1D", 0)), publishedDescriptors());
    Assert.assertEquals(new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "4"))), newDataSchemaMetadata());
    // Check segments in deep storage
    final List<SegmentDescriptor> publishedDescriptors = publishedDescriptors();
    Assert.assertEquals(ImmutableList.of("b"), readSegmentColumn("dim1", publishedDescriptors.get(0)));
    Assert.assertEquals(ImmutableList.of("bb"), readSegmentColumn("dim1t", publishedDescriptors.get(0)));
}
Also used : SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) TaskStatus(org.apache.druid.indexer.TaskStatus) TransformSpec(org.apache.druid.segment.transform.TransformSpec) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) Test(org.junit.Test) IndexTaskTest(org.apache.druid.indexing.common.task.IndexTaskTest)

Example 49 with SegmentDescriptor

use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.

the class AbstractBatchIndexTask method waitForSegmentAvailability.

/**
 * Wait for segments to become available on the cluster. If waitTimeout is reached, giveup on waiting. This is a
 * QoS method that can be used to make Batch Ingest tasks wait to finish until their ingested data is available on
 * the cluster. Doing so gives an end user assurance that a Successful task status means their data is available
 * for querying.
 *
 * @param toolbox {@link TaskToolbox} object with for assisting with task work.
 * @param segmentsToWaitFor {@link List} of segments to wait for availability.
 * @param waitTimeout Millis to wait before giving up
 * @return True if all segments became available, otherwise False.
 */
protected boolean waitForSegmentAvailability(TaskToolbox toolbox, List<DataSegment> segmentsToWaitFor, long waitTimeout) {
    if (segmentsToWaitFor.isEmpty()) {
        log.info("Asked to wait for segments to be available, but I wasn't provided with any segments.");
        return true;
    } else if (waitTimeout < 0) {
        log.warn("Asked to wait for availability for < 0 seconds?! Requested waitTimeout: [%s]", waitTimeout);
        return false;
    }
    log.info("Waiting for [%d] segments to be loaded by the cluster...", segmentsToWaitFor.size());
    final long start = System.nanoTime();
    try (SegmentHandoffNotifier notifier = toolbox.getSegmentHandoffNotifierFactory().createSegmentHandoffNotifier(segmentsToWaitFor.get(0).getDataSource())) {
        ExecutorService exec = Execs.directExecutor();
        CountDownLatch doneSignal = new CountDownLatch(segmentsToWaitFor.size());
        notifier.start();
        for (DataSegment s : segmentsToWaitFor) {
            notifier.registerSegmentHandoffCallback(new SegmentDescriptor(s.getInterval(), s.getVersion(), s.getShardSpec().getPartitionNum()), exec, () -> {
                log.debug("Confirmed availability for [%s]. Removing from list of segments to wait for", s.getId());
                doneSignal.countDown();
            });
        }
        segmentAvailabilityConfirmationCompleted = doneSignal.await(waitTimeout, TimeUnit.MILLISECONDS);
        return segmentAvailabilityConfirmationCompleted;
    } catch (InterruptedException e) {
        log.warn("Interrupted while waiting for segment availablity; Unable to confirm availability!");
        Thread.currentThread().interrupt();
        return false;
    } finally {
        segmentAvailabilityWaitTimeMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
        toolbox.getEmitter().emit(new ServiceMetricEvent.Builder().setDimension("dataSource", getDataSource()).setDimension("taskType", getType()).setDimension("taskId", getId()).setDimension("segmentAvailabilityConfirmed", segmentAvailabilityConfirmationCompleted).build("task/segmentAvailability/wait/time", segmentAvailabilityWaitTimeMs));
    }
}
Also used : SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) ExecutorService(java.util.concurrent.ExecutorService) CountDownLatch(java.util.concurrent.CountDownLatch) DataSegment(org.apache.druid.timeline.DataSegment)

Example 50 with SegmentDescriptor

use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTaskTest method testNoReportParseExceptions.

@Test(timeout = 60_000L)
public void testNoReportParseExceptions() throws Exception {
    expectPublishedSegments(1);
    final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, false, 0, true, null, 1);
    final ListenableFuture<TaskStatus> statusFuture = runTask(task);
    // Wait for firehose to show up, it starts off null.
    while (task.getFirehose() == null) {
        Thread.sleep(50);
    }
    final TestFirehose firehose = (TestFirehose) task.getFirehose();
    firehose.addRows(Arrays.asList(// Good row- will be processed.
    ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "1"), // Null row- will be thrown away.
    null, // Bad metric- will count as processed, but that particular metric won't update.
    ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "foo"), // Bad row- will be unparseable.
    ImmutableMap.of("dim1", "foo", "met1", 2.0, FAIL_DIM, "x"), // Good row- will be processed.
    ImmutableMap.of("t", now.getMillis(), "dim2", "bar", "met1", 2.0)));
    // Stop the firehose, this will drain out existing events.
    firehose.close();
    // Wait for publish.
    Collection<DataSegment> publishedSegments = awaitSegments();
    DataSegment publishedSegment = Iterables.getOnlyElement(publishedSegments);
    // Check metrics.
    Assert.assertEquals(2, task.getRowIngestionMeters().getProcessed());
    Assert.assertEquals(1, task.getRowIngestionMeters().getProcessedWithError());
    Assert.assertEquals(0, task.getRowIngestionMeters().getThrownAway());
    Assert.assertEquals(2, task.getRowIngestionMeters().getUnparseable());
    // Do some queries.
    Assert.assertEquals(3, sumMetric(task, null, "rows").longValue());
    Assert.assertEquals(3, sumMetric(task, null, "met1").longValue());
    awaitHandoffs();
    // Simulate handoff.
    for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
        final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
        Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
        executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
    }
    handOffCallbacks.clear();
    Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED, 2, RowIngestionMeters.PROCESSED_WITH_ERROR, 1, RowIngestionMeters.UNPARSEABLE, 2, RowIngestionMeters.THROWN_AWAY, 0));
    // Wait for the task to finish.
    final TaskStatus taskStatus = statusFuture.get();
    Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
    IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
    Assert.assertEquals(expectedMetrics, reportData.getRowStats());
}
Also used : IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) Executor(java.util.concurrent.Executor) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Pair(org.apache.druid.java.util.common.Pair) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

SegmentDescriptor (org.apache.druid.query.SegmentDescriptor)71 Test (org.junit.Test)47 Interval (org.joda.time.Interval)26 TaskStatus (org.apache.druid.indexer.TaskStatus)21 DataSegment (org.apache.druid.timeline.DataSegment)20 Executor (java.util.concurrent.Executor)19 ArrayList (java.util.ArrayList)17 Result (org.apache.druid.query.Result)16 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)16 List (java.util.List)15 Query (org.apache.druid.query.Query)14 QueryRunner (org.apache.druid.query.QueryRunner)14 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)13 Map (java.util.Map)13 TimeseriesQuery (org.apache.druid.query.timeseries.TimeseriesQuery)13 ImmutableMap (com.google.common.collect.ImmutableMap)12 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)11 QueryPlus (org.apache.druid.query.QueryPlus)11 ResponseContext (org.apache.druid.query.context.ResponseContext)11 ImmutableList (com.google.common.collect.ImmutableList)10