use of org.apache.druid.segment.indexing.RealtimeIOConfig in project druid by druid-io.
the class FireDepartmentTest method testSerde.
@Test
public void testSerde() throws Exception {
ObjectMapper jsonMapper = new DefaultObjectMapper();
jsonMapper.setInjectableValues(new InjectableValues.Std().addValue(ObjectMapper.class, jsonMapper));
FireDepartment schema = new FireDepartment(new DataSchema("foo", jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("dim1", "dim2"))), null, null, null), null), Map.class), new AggregatorFactory[] { new CountAggregatorFactory("count") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), null, jsonMapper), new RealtimeIOConfig(null, new RealtimePlumberSchool(null, null, null, null, null, null, null, NoopJoinableFactory.INSTANCE, TestHelper.getTestIndexMergerV9(OffHeapMemorySegmentWriteOutMediumFactory.instance()), TestHelper.getTestIndexIO(), MapCache.create(0), NO_CACHE_CONFIG, new CachePopulatorStats(), TestHelper.makeJsonMapper())), RealtimeTuningConfig.makeDefaultTuningConfig(new File("/tmp/nonexistent")));
String json = jsonMapper.writeValueAsString(schema);
FireDepartment newSchema = jsonMapper.readValue(json, FireDepartment.class);
Assert.assertEquals(schema.getDataSchema().getDataSource(), newSchema.getDataSchema().getDataSource());
Assert.assertEquals("/tmp/nonexistent", schema.getTuningConfig().getBasePersistDirectory().toString());
}
use of org.apache.druid.segment.indexing.RealtimeIOConfig in project druid by druid-io.
the class RealtimeIndexTask method run.
@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
runThread = Thread.currentThread();
if (this.plumber != null) {
throw new IllegalStateException("Plumber must be null");
}
setupTimeoutAlert();
boolean normalExit = true;
// It would be nice to get the PlumberSchool in the constructor. Although that will need jackson injectables for
// stuff like the ServerView, which seems kind of odd? Perhaps revisit this when Guice has been introduced.
final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(toolbox);
// NOTE: We talk to the coordinator in various places in the plumber and we could be more robust to issues
// with the coordinator. Right now, we'll block/throw in whatever thread triggered the coordinator behavior,
// which will typically be either the main data processing loop or the persist thread.
// Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
final long lockTimeoutMs = getContextValue(Tasks.LOCK_TIMEOUT_KEY, Tasks.DEFAULT_LOCK_TIMEOUT_MILLIS);
// Note: if lockTimeoutMs is larger than ServerConfig.maxIdleTime, http timeout error can occur while waiting for a
// lock to be acquired.
final DataSegmentAnnouncer lockingSegmentAnnouncer = new DataSegmentAnnouncer() {
@Override
public void announceSegment(final DataSegment segment) throws IOException {
// Side effect: Calling announceSegment causes a lock to be acquired
final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segment.getInterval(), lockTimeoutMs)), "Cannot acquire a lock for interval[%s]", segment.getInterval());
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segment.getInterval()));
}
toolbox.getSegmentAnnouncer().announceSegment(segment);
}
@Override
public void unannounceSegment(final DataSegment segment) throws IOException {
try {
toolbox.getSegmentAnnouncer().unannounceSegment(segment);
} finally {
toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
}
}
@Override
public void announceSegments(Iterable<DataSegment> segments) throws IOException {
// Side effect: Calling announceSegments causes locks to be acquired
for (DataSegment segment : segments) {
final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segment.getInterval(), lockTimeoutMs)), "Cannot acquire a lock for interval[%s]", segment.getInterval());
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segment.getInterval()));
}
}
toolbox.getSegmentAnnouncer().announceSegments(segments);
}
@Override
public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
try {
toolbox.getSegmentAnnouncer().unannounceSegments(segments);
} finally {
for (DataSegment segment : segments) {
toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
}
}
}
};
// NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
// NOTE: (and thus the firehose)
// Shouldn't usually happen, since we don't expect people to submit tasks that intersect with the
// realtime window, but if they do it can be problematic. If we decide to care, we can use more threads in
// the plumber such that waiting for the coordinator doesn't block data processing.
final VersioningPolicy versioningPolicy = new VersioningPolicy() {
@Override
public String getVersion(final Interval interval) {
try {
// Side effect: Calling getVersion causes a lock to be acquired
final TimeChunkLockAcquireAction action = new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, interval, lockTimeoutMs);
final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(action), "Cannot acquire a lock for interval[%s]", interval);
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", interval));
}
return lock.getVersion();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
DataSchema dataSchema = spec.getDataSchema();
RealtimeIOConfig realtimeIOConfig = spec.getIOConfig();
RealtimeTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir()).withVersioningPolicy(versioningPolicy);
final FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, tuningConfig);
this.metrics = fireDepartment.getMetrics();
final RealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartment);
this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
// NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
// NOTE: that redundant realtime tasks will upload to the same location. This can cause index.zip
// NOTE: (partitionNum_index.zip for HDFS data storage) to mismatch, or it can cause historical nodes to load
// NOTE: different instances of the "same" segment.
final PlumberSchool plumberSchool = new RealtimePlumberSchool(toolbox.getEmitter(), toolbox.getQueryRunnerFactoryConglomerate(), toolbox.getSegmentPusher(), lockingSegmentAnnouncer, segmentPublisher, toolbox.getSegmentHandoffNotifierFactory(), toolbox.getQueryProcessingPool(), toolbox.getJoinableFactory(), toolbox.getIndexMergerV9(), toolbox.getIndexIO(), toolbox.getCache(), toolbox.getCacheConfig(), toolbox.getCachePopulatorStats(), toolbox.getJsonMapper());
this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, metrics);
final Supplier<Committer> committerSupplier = Committers.nilSupplier();
LookupNodeService lookupNodeService = getContextValue(CTX_KEY_LOOKUP_TIER) == null ? toolbox.getLookupNodeService() : new LookupNodeService((String) getContextValue(CTX_KEY_LOOKUP_TIER));
DiscoveryDruidNode discoveryDruidNode = new DiscoveryDruidNode(toolbox.getDruidNode(), NodeRole.PEON, ImmutableMap.of(toolbox.getDataNodeService().getName(), toolbox.getDataNodeService(), lookupNodeService.getName(), lookupNodeService));
try {
toolbox.getDataSegmentServerAnnouncer().announce();
toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
plumber.startJob();
// Set up metrics emission
toolbox.addMonitor(metricsMonitor);
// Delay firehose connection to avoid claiming input resources while the plumber is starting up.
final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
// Skip connecting firehose if we've been stopped before we got started.
synchronized (this) {
if (!gracefullyStopped) {
firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
}
}
// Time to read data!
while (firehose != null && (!gracefullyStopped || firehoseDrainableByClosing) && firehose.hasMore()) {
Plumbers.addNextRow(committerSupplier, firehose, plumber, tuningConfig.isReportParseExceptions(), metrics);
}
} catch (Throwable e) {
normalExit = false;
log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
throw e;
} finally {
if (normalExit) {
try {
// Persist if we had actually started.
if (firehose != null) {
log.info("Persisting remaining data.");
final Committer committer = committerSupplier.get();
final CountDownLatch persistLatch = new CountDownLatch(1);
plumber.persist(new Committer() {
@Override
public Object getMetadata() {
return committer.getMetadata();
}
@Override
public void run() {
try {
committer.run();
} finally {
persistLatch.countDown();
}
}
});
persistLatch.await();
}
if (gracefullyStopped) {
log.info("Gracefully stopping.");
} else {
log.info("Finishing the job.");
synchronized (this) {
if (gracefullyStopped) {
// Someone called stopGracefully after we checked the flag. That's okay, just stop now.
log.info("Gracefully stopping.");
} else {
finishingJob = true;
}
}
if (finishingJob) {
plumber.finishJob();
}
}
} catch (InterruptedException e) {
log.debug(e, "Interrupted while finishing the job");
} catch (Exception e) {
log.makeAlert(e, "Failed to finish realtime task").emit();
throw e;
} finally {
if (firehose != null) {
CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
}
toolbox.removeMonitor(metricsMonitor);
}
}
toolbox.getDataSegmentServerAnnouncer().unannounce();
toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
}
log.info("Job done!");
return TaskStatus.success(getId());
}
use of org.apache.druid.segment.indexing.RealtimeIOConfig in project druid by druid-io.
the class PartialSegmentGenerateTask method generateSegments.
private List<DataSegment> generateSegments(final TaskToolbox toolbox, final ParallelIndexSupervisorTaskClient taskClient, final InputSource inputSource, final File tmpDir) throws IOException, InterruptedException, ExecutionException, TimeoutException {
final DataSchema dataSchema = ingestionSchema.getDataSchema();
final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
final FireDepartmentMetrics fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
final RowIngestionMeters buildSegmentsMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
toolbox.addMonitor(new RealtimeMetricsMonitor(Collections.singletonList(fireDepartmentForMetrics), Collections.singletonMap(DruidMetrics.TASK_ID, new String[] { getId() })));
final ParallelIndexTuningConfig tuningConfig = ingestionSchema.getTuningConfig();
final PartitionsSpec partitionsSpec = tuningConfig.getGivenOrDefaultPartitionsSpec();
final long pushTimeout = tuningConfig.getPushTimeout();
final SegmentAllocatorForBatch segmentAllocator = createSegmentAllocator(toolbox, taskClient);
final SequenceNameFunction sequenceNameFunction = segmentAllocator.getSequenceNameFunction();
final ParseExceptionHandler parseExceptionHandler = new ParseExceptionHandler(buildSegmentsMeters, tuningConfig.isLogParseExceptions(), tuningConfig.getMaxParseExceptions(), tuningConfig.getMaxSavedParseExceptions());
final boolean useMaxMemoryEstimates = getContextValue(Tasks.USE_MAX_MEMORY_ESTIMATES, Tasks.DEFAULT_USE_MAX_MEMORY_ESTIMATES);
final Appenderator appenderator = BatchAppenderators.newAppenderator(getId(), toolbox.getAppenderatorsManager(), fireDepartmentMetrics, toolbox, dataSchema, tuningConfig, new ShuffleDataSegmentPusher(supervisorTaskId, getId(), toolbox.getIntermediaryDataManager()), buildSegmentsMeters, parseExceptionHandler, useMaxMemoryEstimates);
boolean exceptionOccurred = false;
try (final BatchAppenderatorDriver driver = BatchAppenderators.newDriver(appenderator, toolbox, segmentAllocator)) {
driver.startJob();
final SegmentsAndCommitMetadata pushed = InputSourceProcessor.process(dataSchema, driver, partitionsSpec, inputSource, inputSource.needsFormat() ? ParallelIndexSupervisorTask.getInputFormat(ingestionSchema) : null, tmpDir, sequenceNameFunction, inputRowIteratorBuilder, buildSegmentsMeters, parseExceptionHandler, pushTimeout);
return pushed.getSegments();
} catch (Exception e) {
exceptionOccurred = true;
throw e;
} finally {
if (exceptionOccurred) {
appenderator.closeNow();
} else {
appenderator.close();
}
}
}
use of org.apache.druid.segment.indexing.RealtimeIOConfig in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTaskTest method makeRealtimeTask.
private AppenderatorDriverRealtimeIndexTask makeRealtimeTask(final String taskId, final TransformSpec transformSpec, final boolean reportParseExceptions, final long handoffTimeout, final Boolean logParseExceptions, final Integer maxParseExceptions, final Integer maxSavedParseExceptions, final Integer maxRowsPerSegment, final Long maxTotalRows) {
DataSchema dataSchema = new DataSchema("test_ds", TestHelper.makeJsonMapper().convertValue(new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("t", "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("dim1"), new StringDimensionSchema("dim2"), new StringDimensionSchema("dim1t"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat"))))), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT), new AggregatorFactory[] { new CountAggregatorFactory("rows"), new LongSumAggregatorFactory("met1", "met1") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), transformSpec, OBJECT_MAPPER);
RealtimeIOConfig realtimeIOConfig = new RealtimeIOConfig(new TestFirehoseFactory(), null);
RealtimeAppenderatorTuningConfig tuningConfig = new RealtimeAppenderatorTuningConfig(null, 1000, null, null, maxRowsPerSegment, maxTotalRows, null, null, null, null, null, null, reportParseExceptions, handoffTimeout, null, null, logParseExceptions, maxParseExceptions, maxSavedParseExceptions);
return new AppenderatorDriverRealtimeIndexTask(taskId, null, new RealtimeAppenderatorIngestionSpec(dataSchema, realtimeIOConfig, tuningConfig), null) {
@Override
protected boolean isFirehoseDrainableByClosing(FirehoseFactory firehoseFactory) {
return true;
}
};
}
use of org.apache.druid.segment.indexing.RealtimeIOConfig in project druid by druid-io.
the class TaskSerdeTest method testRealtimeIndexTaskSerde.
@Test
public void testRealtimeIndexTaskSerde() throws Exception {
final RealtimeIndexTask task = new RealtimeIndexTask(null, new TaskResource("rofl", 2), new FireDepartment(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), null, jsonMapper), new RealtimeIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), (schema, config, metrics) -> null), new RealtimeTuningConfig(null, 1, 10L, null, new Period("PT10M"), null, null, null, null, 1, NoneShardSpec.instance(), indexSpec, null, 0, 0, true, null, null, null, null)), null);
final String json = jsonMapper.writeValueAsString(task);
// Just want to run the clock a bit to make sure the task id doesn't change
Thread.sleep(100);
final RealtimeIndexTask task2 = (RealtimeIndexTask) jsonMapper.readValue(json, Task.class);
Assert.assertEquals("foo", task.getDataSource());
Assert.assertEquals(2, task.getTaskResource().getRequiredCapacity());
Assert.assertEquals("rofl", task.getTaskResource().getAvailabilityGroup());
Assert.assertEquals(new Period("PT10M"), task.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod());
Assert.assertEquals(Granularities.HOUR, task.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity());
Assert.assertTrue(task.getRealtimeIngestionSchema().getTuningConfig().isReportParseExceptions());
Assert.assertEquals(task.getId(), task2.getId());
Assert.assertEquals(task.getGroupId(), task2.getGroupId());
Assert.assertEquals(task.getDataSource(), task2.getDataSource());
Assert.assertEquals(task.getTaskResource().getRequiredCapacity(), task2.getTaskResource().getRequiredCapacity());
Assert.assertEquals(task.getTaskResource().getAvailabilityGroup(), task2.getTaskResource().getAvailabilityGroup());
Assert.assertEquals(task.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod(), task2.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod());
Assert.assertEquals(task.getRealtimeIngestionSchema().getTuningConfig().getMaxBytesInMemory(), task2.getRealtimeIngestionSchema().getTuningConfig().getMaxBytesInMemory());
Assert.assertEquals(task.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity(), task2.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity());
}
Aggregations