use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class SpecificSegmentQueryRunnerTest method validate.
private void validate(ObjectMapper mapper, SegmentDescriptor descriptor, ResponseContext responseContext) throws IOException {
List<SegmentDescriptor> missingSegments = responseContext.getMissingSegments();
Assert.assertTrue(missingSegments != null);
SegmentDescriptor segmentDesc = missingSegments.get(0);
SegmentDescriptor newDesc = mapper.readValue(mapper.writeValueAsString(segmentDesc), SegmentDescriptor.class);
Assert.assertEquals(descriptor, newDesc);
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class RealtimePlumber method persistAndMerge.
// Submits persist-n-merge task for a Sink to the mergeExecutor
private void persistAndMerge(final long truncatedTime, final Sink sink) {
final String threadName = StringUtils.format("%s-%s-persist-n-merge", schema.getDataSource(), DateTimes.utc(truncatedTime));
mergeExecutor.execute(new ThreadRenamingRunnable(threadName) {
final Interval interval = sink.getInterval();
Stopwatch mergeStopwatch = null;
@Override
public void doRun() {
try {
// Bail out if this sink has been abandoned by a previously-executed task.
if (sinks.get(truncatedTime) != sink) {
log.info("Sink[%s] was abandoned, bailing out of persist-n-merge.", sink);
return;
}
// Use a file to indicate that pushing has completed.
final File persistDir = computePersistDir(schema, interval);
final File mergedTarget = new File(persistDir, "merged");
final File isPushedMarker = new File(persistDir, "isPushedMarker");
if (!isPushedMarker.exists()) {
removeSegment(sink, mergedTarget);
if (mergedTarget.exists()) {
log.warn("Merged target[%s] still exists after attempt to delete it; skipping push.", mergedTarget);
return;
}
} else {
log.info("Already pushed sink[%s]", sink);
return;
}
/*
Note: it the plumber crashes after persisting a subset of hydrants then might duplicate data as these
hydrants will be read but older commitMetadata will be used. fixing this possibly needs structural
changes to plumber.
*/
for (FireHydrant hydrant : sink) {
synchronized (hydrant) {
if (!hydrant.hasSwapped()) {
log.info("Hydrant[%s] hasn't swapped yet, swapping. Sink[%s]", hydrant, sink);
final int rowCount = persistHydrant(hydrant, schema, interval, null);
metrics.incrementRowOutputCount(rowCount);
}
}
}
final long mergeThreadCpuTime = JvmUtils.safeGetThreadCpuTime();
mergeStopwatch = Stopwatch.createStarted();
final File mergedFile;
List<QueryableIndex> indexes = new ArrayList<>();
Closer closer = Closer.create();
try {
for (FireHydrant fireHydrant : sink) {
Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
log.info("Adding hydrant[%s]", fireHydrant);
indexes.add(queryableIndex);
closer.register(segmentAndCloseable.rhs);
}
mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), null, mergedTarget, config.getIndexSpec(), config.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), config.getSegmentWriteOutMediumFactory(), -1);
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
// emit merge metrics before publishing segment
metrics.incrementMergeCpuTime(JvmUtils.safeGetThreadCpuTime() - mergeThreadCpuTime);
metrics.incrementMergeTimeMillis(mergeStopwatch.elapsed(TimeUnit.MILLISECONDS));
log.info("Pushing [%s] to deep storage", sink.getSegment().getId());
DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec())), false);
log.info("Inserting [%s] to the metadata store", sink.getSegment().getId());
segmentPublisher.publishSegment(segment);
if (!isPushedMarker.createNewFile()) {
log.makeAlert("Failed to create marker file for [%s]", schema.getDataSource()).addData("interval", sink.getInterval()).addData("partitionNum", segment.getShardSpec().getPartitionNum()).addData("marker", isPushedMarker).emit();
}
} catch (Exception e) {
metrics.incrementFailedHandoffs();
log.makeAlert(e, "Failed to persist merged index[%s]", schema.getDataSource()).addData("interval", interval).emit();
if (shuttingDown) {
// We're trying to shut down, and this segment failed to push. Let's just get rid of it.
// This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
cleanShutdown = false;
abandonSegment(truncatedTime, sink);
}
} finally {
if (mergeStopwatch != null) {
mergeStopwatch.stop();
}
}
}
});
handoffNotifier.registerSegmentHandoffCallback(new SegmentDescriptor(sink.getInterval(), sink.getVersion(), config.getShardSpec().getPartitionNum()), mergeExecutor, new Runnable() {
@Override
public void run() {
abandonSegment(sink.getInterval().getStartMillis(), sink);
metrics.incrementHandOffCount();
}
});
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class KinesisIndexTaskTest method testRunWithTransformSpec.
@Test(timeout = 120_000L)
public void testRunWithTransformSpec() throws Exception {
recordSupplier.assign(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(recordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
recordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(recordSupplier.poll(EasyMock.anyLong())).andReturn(generateRecords(0, 13)).once();
recordSupplier.close();
EasyMock.expectLastCall().once();
replayAll();
final KinesisIndexTask task = createTask(null, NEW_DATA_SCHEMA.withTransformSpec(new TransformSpec(new SelectorDimFilter("dim1", "b", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())))), new KinesisIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "0"), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "4")), true, null, null, INPUT_FORMAT, "awsEndpoint", null, null, null, null, false));
final ListenableFuture<TaskStatus> future = runTask(task);
// Wait for the task to start reading
while (task.getRunner().getStatus() != SeekableStreamIndexTaskRunner.Status.READING) {
Thread.sleep(10);
}
// Wait for task to exit
Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
verifyAll();
// Check metrics
Assert.assertEquals(1, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getThrownAway());
// Check published metadata
assertEqualsExceptVersion(ImmutableList.of(sdd("2009/P1D", 0)), publishedDescriptors());
Assert.assertEquals(new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "4"))), newDataSchemaMetadata());
// Check segments in deep storage
final List<SegmentDescriptor> publishedDescriptors = publishedDescriptors();
Assert.assertEquals(ImmutableList.of("b"), readSegmentColumn("dim1", publishedDescriptors.get(0)));
Assert.assertEquals(ImmutableList.of("bb"), readSegmentColumn("dim1t", publishedDescriptors.get(0)));
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class AbstractBatchIndexTask method waitForSegmentAvailability.
/**
* Wait for segments to become available on the cluster. If waitTimeout is reached, giveup on waiting. This is a
* QoS method that can be used to make Batch Ingest tasks wait to finish until their ingested data is available on
* the cluster. Doing so gives an end user assurance that a Successful task status means their data is available
* for querying.
*
* @param toolbox {@link TaskToolbox} object with for assisting with task work.
* @param segmentsToWaitFor {@link List} of segments to wait for availability.
* @param waitTimeout Millis to wait before giving up
* @return True if all segments became available, otherwise False.
*/
protected boolean waitForSegmentAvailability(TaskToolbox toolbox, List<DataSegment> segmentsToWaitFor, long waitTimeout) {
if (segmentsToWaitFor.isEmpty()) {
log.info("Asked to wait for segments to be available, but I wasn't provided with any segments.");
return true;
} else if (waitTimeout < 0) {
log.warn("Asked to wait for availability for < 0 seconds?! Requested waitTimeout: [%s]", waitTimeout);
return false;
}
log.info("Waiting for [%d] segments to be loaded by the cluster...", segmentsToWaitFor.size());
final long start = System.nanoTime();
try (SegmentHandoffNotifier notifier = toolbox.getSegmentHandoffNotifierFactory().createSegmentHandoffNotifier(segmentsToWaitFor.get(0).getDataSource())) {
ExecutorService exec = Execs.directExecutor();
CountDownLatch doneSignal = new CountDownLatch(segmentsToWaitFor.size());
notifier.start();
for (DataSegment s : segmentsToWaitFor) {
notifier.registerSegmentHandoffCallback(new SegmentDescriptor(s.getInterval(), s.getVersion(), s.getShardSpec().getPartitionNum()), exec, () -> {
log.debug("Confirmed availability for [%s]. Removing from list of segments to wait for", s.getId());
doneSignal.countDown();
});
}
segmentAvailabilityConfirmationCompleted = doneSignal.await(waitTimeout, TimeUnit.MILLISECONDS);
return segmentAvailabilityConfirmationCompleted;
} catch (InterruptedException e) {
log.warn("Interrupted while waiting for segment availablity; Unable to confirm availability!");
Thread.currentThread().interrupt();
return false;
} finally {
segmentAvailabilityWaitTimeMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
toolbox.getEmitter().emit(new ServiceMetricEvent.Builder().setDimension("dataSource", getDataSource()).setDimension("taskType", getType()).setDimension("taskId", getId()).setDimension("segmentAvailabilityConfirmed", segmentAvailabilityConfirmationCompleted).build("task/segmentAvailability/wait/time", segmentAvailabilityWaitTimeMs));
}
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTaskTest method testNoReportParseExceptions.
@Test(timeout = 60_000L)
public void testNoReportParseExceptions() throws Exception {
expectPublishedSegments(1);
final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, false, 0, true, null, 1);
final ListenableFuture<TaskStatus> statusFuture = runTask(task);
// Wait for firehose to show up, it starts off null.
while (task.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task.getFirehose();
firehose.addRows(Arrays.asList(// Good row- will be processed.
ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "1"), // Null row- will be thrown away.
null, // Bad metric- will count as processed, but that particular metric won't update.
ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "foo"), // Bad row- will be unparseable.
ImmutableMap.of("dim1", "foo", "met1", 2.0, FAIL_DIM, "x"), // Good row- will be processed.
ImmutableMap.of("t", now.getMillis(), "dim2", "bar", "met1", 2.0)));
// Stop the firehose, this will drain out existing events.
firehose.close();
// Wait for publish.
Collection<DataSegment> publishedSegments = awaitSegments();
DataSegment publishedSegment = Iterables.getOnlyElement(publishedSegments);
// Check metrics.
Assert.assertEquals(2, task.getRowIngestionMeters().getProcessed());
Assert.assertEquals(1, task.getRowIngestionMeters().getProcessedWithError());
Assert.assertEquals(0, task.getRowIngestionMeters().getThrownAway());
Assert.assertEquals(2, task.getRowIngestionMeters().getUnparseable());
// Do some queries.
Assert.assertEquals(3, sumMetric(task, null, "rows").longValue());
Assert.assertEquals(3, sumMetric(task, null, "met1").longValue());
awaitHandoffs();
// Simulate handoff.
for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
}
handOffCallbacks.clear();
Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED, 2, RowIngestionMeters.PROCESSED_WITH_ERROR, 1, RowIngestionMeters.UNPARSEABLE, 2, RowIngestionMeters.THROWN_AWAY, 0));
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
Assert.assertEquals(expectedMetrics, reportData.getRowStats());
}
Aggregations