use of org.apache.druid.discovery.DiscoveryDruidNode in project druid by druid-io.
the class K8sAnnouncerAndDiscoveryIntTest method testAnnouncementAndDiscoveryWorkflow.
@Test(timeout = 30000L)
public void testAnnouncementAndDiscoveryWorkflow() throws Exception {
K8sApiClient k8sApiClient = new DefaultK8sApiClient(Config.defaultClient(), new DefaultObjectMapper());
K8sDruidNodeDiscoveryProvider discoveryProvider = new K8sDruidNodeDiscoveryProvider(podInfo, discoveryConfig, k8sApiClient);
discoveryProvider.start();
BooleanSupplier nodeInquirer = discoveryProvider.getForNode(testNode.getDruidNode(), NodeRole.ROUTER);
Assert.assertFalse(nodeInquirer.getAsBoolean());
DruidNodeDiscovery discovery = discoveryProvider.getForNodeRole(NodeRole.ROUTER);
CountDownLatch nodeViewInitialized = new CountDownLatch(1);
CountDownLatch nodeAppeared = new CountDownLatch(1);
CountDownLatch nodeDisappeared = new CountDownLatch(1);
discovery.registerListener(new DruidNodeDiscovery.Listener() {
@Override
public void nodesAdded(Collection<DiscoveryDruidNode> nodes) {
Iterator<DiscoveryDruidNode> iter = nodes.iterator();
if (iter.hasNext() && testNode.getDruidNode().getHostAndPort().equals(iter.next().getDruidNode().getHostAndPort())) {
nodeAppeared.countDown();
}
}
@Override
public void nodesRemoved(Collection<DiscoveryDruidNode> nodes) {
Iterator<DiscoveryDruidNode> iter = nodes.iterator();
if (iter.hasNext() && testNode.getDruidNode().getHostAndPort().equals(iter.next().getDruidNode().getHostAndPort())) {
nodeDisappeared.countDown();
}
}
@Override
public void nodeViewInitialized() {
nodeViewInitialized.countDown();
}
});
nodeViewInitialized.await();
K8sDruidNodeAnnouncer announcer = new K8sDruidNodeAnnouncer(podInfo, discoveryConfig, k8sApiClient, jsonMapper);
announcer.announce(testNode);
nodeAppeared.await();
Assert.assertTrue(nodeInquirer.getAsBoolean());
announcer.unannounce(testNode);
nodeDisappeared.await();
Assert.assertFalse(nodeInquirer.getAsBoolean());
discoveryProvider.stop();
}
use of org.apache.druid.discovery.DiscoveryDruidNode in project druid by druid-io.
the class DefaultK8sApiClient method listPods.
@Override
public DiscoveryDruidNodeList listPods(String podNamespace, String labelSelector, NodeRole nodeRole) {
try {
V1PodList podList = coreV1Api.listNamespacedPod(podNamespace, null, null, null, null, labelSelector, 0, null, null, null);
Preconditions.checkState(podList != null, "WTH: NULL podList");
Map<String, DiscoveryDruidNode> allNodes = new HashMap();
for (V1Pod podDef : podList.getItems()) {
DiscoveryDruidNode node = getDiscoveryDruidNodeFromPodDef(nodeRole, podDef);
allNodes.put(node.getDruidNode().getHostAndPortToUse(), node);
}
return new DiscoveryDruidNodeList(podList.getMetadata().getResourceVersion(), allNodes);
} catch (ApiException ex) {
throw new RE(ex, "Expection in listing pods, code[%d] and error[%s].", ex.getCode(), ex.getResponseBody());
}
}
use of org.apache.druid.discovery.DiscoveryDruidNode in project druid by druid-io.
the class ITHighAvailabilityTest method testDiscoveryAndSelfDiscovery.
@Test
public void testDiscoveryAndSelfDiscovery() {
ITRetryUtil.retryUntil(() -> {
try {
List<DruidNodeDiscovery> disco = ImmutableList.of(druidNodeDiscovery.getForNodeRole(NodeRole.COORDINATOR), druidNodeDiscovery.getForNodeRole(NodeRole.OVERLORD), druidNodeDiscovery.getForNodeRole(NodeRole.HISTORICAL), druidNodeDiscovery.getForNodeRole(NodeRole.MIDDLE_MANAGER), druidNodeDiscovery.getForNodeRole(NodeRole.INDEXER), druidNodeDiscovery.getForNodeRole(NodeRole.BROKER), druidNodeDiscovery.getForNodeRole(NodeRole.ROUTER));
int servicesDiscovered = 0;
for (DruidNodeDiscovery nodeRole : disco) {
Collection<DiscoveryDruidNode> nodes = nodeRole.getAllNodes();
servicesDiscovered += testSelfDiscovery(nodes);
}
return servicesDiscovered > 5;
} catch (Throwable t) {
return false;
}
}, true, RETRY_DELAY, NUM_RETRIES, "Standard services discovered");
}
use of org.apache.druid.discovery.DiscoveryDruidNode in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTask method run.
@Override
public TaskStatus run(final TaskToolbox toolbox) {
runThread = Thread.currentThread();
authorizerMapper = toolbox.getAuthorizerMapper();
rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, spec.getTuningConfig().isLogParseExceptions(), spec.getTuningConfig().getMaxParseExceptions(), spec.getTuningConfig().getMaxSavedParseExceptions());
setupTimeoutAlert();
DataSchema dataSchema = spec.getDataSchema();
RealtimeAppenderatorTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir());
final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
final TaskRealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics, rowIngestionMeters);
this.metrics = fireDepartmentForMetrics.getMetrics();
final Supplier<Committer> committerSupplier = Committers.nilSupplier();
DiscoveryDruidNode discoveryDruidNode = createDiscoveryDruidNode(toolbox);
appenderator = newAppenderator(dataSchema, tuningConfig, metrics, toolbox);
TaskLockType lockType = getContextValue(Tasks.USE_SHARED_LOCK, false) ? TaskLockType.SHARED : TaskLockType.EXCLUSIVE;
StreamAppenderatorDriver driver = newDriver(dataSchema, appenderator, toolbox, metrics, lockType);
try {
log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName());
toolbox.getChatHandlerProvider().register(getId(), this, false);
if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
toolbox.getDataSegmentServerAnnouncer().announce();
toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
}
driver.startJob(segmentId -> {
try {
if (lockGranularity == LockGranularity.SEGMENT) {
return toolbox.getTaskActionClient().submit(new SegmentLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), segmentId.getVersion(), segmentId.getShardSpec().getPartitionNum(), 1000L)).isOk();
} else {
final TaskLock lock = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), 1000L));
if (lock == null) {
return false;
}
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segmentId.getInterval()));
}
return true;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
});
// Set up metrics emission
toolbox.addMonitor(metricsMonitor);
// Delay firehose connection to avoid claiming input resources while the plumber is starting up.
final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
int sequenceNumber = 0;
String sequenceName = makeSequenceName(getId(), sequenceNumber);
final TransactionalSegmentPublisher publisher = (mustBeNullOrEmptyOverwriteSegments, mustBeNullOrEmptyDropSegments, segments, commitMetadata) -> {
if (mustBeNullOrEmptyOverwriteSegments != null && !mustBeNullOrEmptyOverwriteSegments.isEmpty()) {
throw new ISE("Stream ingestion task unexpectedly attempted to overwrite segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyOverwriteSegments));
}
if (mustBeNullOrEmptyDropSegments != null && !mustBeNullOrEmptyDropSegments.isEmpty()) {
throw new ISE("Stream ingestion task unexpectedly attempted to drop segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyDropSegments));
}
final SegmentTransactionalInsertAction action = SegmentTransactionalInsertAction.appendAction(segments, null, null);
return toolbox.getTaskActionClient().submit(action);
};
// Skip connecting firehose if we've been stopped before we got started.
synchronized (this) {
if (!gracefullyStopped) {
firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
}
}
ingestionState = IngestionState.BUILD_SEGMENTS;
// Time to read data!
while (!gracefullyStopped && firehoseDrainableByClosing && firehose.hasMore()) {
try {
InputRow inputRow = firehose.nextRow();
if (inputRow == null) {
log.debug("Discarded null row, considering thrownAway.");
rowIngestionMeters.incrementThrownAway();
} else {
AppenderatorDriverAddResult addResult = driver.add(inputRow, sequenceName, committerSupplier);
if (addResult.isOk()) {
final boolean isPushRequired = addResult.isPushRequired(tuningConfig.getPartitionsSpec().getMaxRowsPerSegment(), tuningConfig.getPartitionsSpec().getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
if (isPushRequired) {
publishSegments(driver, publisher, committerSupplier, sequenceName);
sequenceNumber++;
sequenceName = makeSequenceName(getId(), sequenceNumber);
}
} else {
// If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
}
}
} catch (ParseException e) {
handleParseException(e);
}
}
ingestionState = IngestionState.COMPLETED;
if (!gracefullyStopped) {
synchronized (this) {
if (gracefullyStopped) {
// Someone called stopGracefully after we checked the flag. That's okay, just stop now.
log.info("Gracefully stopping.");
} else {
finishingJob = true;
}
}
if (finishingJob) {
log.info("Finishing job...");
// Publish any remaining segments
publishSegments(driver, publisher, committerSupplier, sequenceName);
waitForSegmentPublishAndHandoff(tuningConfig.getPublishAndHandoffTimeout());
}
} else if (firehose != null) {
log.info("Task was gracefully stopped, will persist data before exiting");
persistAndWait(driver, committerSupplier.get());
}
} catch (Throwable e) {
log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
errorMsg = Throwables.getStackTraceAsString(e);
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.failure(getId(), errorMsg);
} finally {
toolbox.getChatHandlerProvider().unregister(getId());
CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
appenderator.close();
CloseableUtils.closeAndSuppressExceptions(driver, e -> log.warn("Failed to close AppenderatorDriver"));
toolbox.removeMonitor(metricsMonitor);
if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
toolbox.getDataSegmentServerAnnouncer().unannounce();
toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
}
}
log.info("Job done!");
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.success(getId());
}
use of org.apache.druid.discovery.DiscoveryDruidNode in project druid by druid-io.
the class HttpRemoteTaskRunnerTest method testOneStuckTaskAssignmentDoesntBlockOthers.
/*
Simulates one task not getting acknowledged to be running after assigning it to a worker. But, other tasks are
successfully assigned to other worker and get completed.
*/
@Test(timeout = 60_000L)
public void testOneStuckTaskAssignmentDoesntBlockOthers() throws Exception {
TestDruidNodeDiscovery druidNodeDiscovery = new TestDruidNodeDiscovery();
DruidNodeDiscoveryProvider druidNodeDiscoveryProvider = EasyMock.createMock(DruidNodeDiscoveryProvider.class);
EasyMock.expect(druidNodeDiscoveryProvider.getForService(WorkerNodeService.DISCOVERY_SERVICE_KEY)).andReturn(druidNodeDiscovery);
EasyMock.replay(druidNodeDiscoveryProvider);
Task task1 = NoopTask.create("task-id-1", 0);
Task task2 = NoopTask.create("task-id-2", 0);
Task task3 = NoopTask.create("task-id-3", 0);
HttpRemoteTaskRunner taskRunner = new HttpRemoteTaskRunner(TestHelper.makeJsonMapper(), new HttpRemoteTaskRunnerConfig() {
@Override
public int getPendingTasksRunnerNumThreads() {
return 3;
}
}, EasyMock.createNiceMock(HttpClient.class), DSuppliers.of(new AtomicReference<>(DefaultWorkerBehaviorConfig.defaultConfig())), new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, EasyMock.createNiceMock(TaskStorage.class), EasyMock.createNiceMock(CuratorFramework.class), new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null)) {
@Override
protected WorkerHolder createWorkerHolder(ObjectMapper smileMapper, HttpClient httpClient, HttpRemoteTaskRunnerConfig config, ScheduledExecutorService workersSyncExec, WorkerHolder.Listener listener, Worker worker, List<TaskAnnouncement> knownAnnouncements) {
return HttpRemoteTaskRunnerTest.createWorkerHolder(smileMapper, httpClient, config, workersSyncExec, listener, worker, ImmutableList.of(), ImmutableList.of(), // no announcements would be received for task1
ImmutableMap.of(task1, ImmutableList.of()), new AtomicInteger(), ImmutableSet.of());
}
};
taskRunner.start();
DiscoveryDruidNode druidNode1 = new DiscoveryDruidNode(new DruidNode("service", "host1", false, 8080, null, true, false), NodeRole.MIDDLE_MANAGER, ImmutableMap.of(WorkerNodeService.DISCOVERY_SERVICE_KEY, new WorkerNodeService("ip1", 2, "0", WorkerConfig.DEFAULT_CATEGORY)));
DiscoveryDruidNode druidNode2 = new DiscoveryDruidNode(new DruidNode("service", "host2", false, 8080, null, true, false), NodeRole.MIDDLE_MANAGER, ImmutableMap.of(WorkerNodeService.DISCOVERY_SERVICE_KEY, new WorkerNodeService("ip2", 2, "0", WorkerConfig.DEFAULT_CATEGORY)));
druidNodeDiscovery.getListeners().get(0).nodesAdded(ImmutableList.of(druidNode1, druidNode2));
taskRunner.run(task1);
Future<TaskStatus> future2 = taskRunner.run(task2);
Future<TaskStatus> future3 = taskRunner.run(task3);
Assert.assertTrue(future2.get().isSuccess());
Assert.assertTrue(future3.get().isSuccess());
Assert.assertEquals(task1.getId(), Iterables.getOnlyElement(taskRunner.getPendingTasks()).getTaskId());
}
Aggregations