use of com.couchbase.client.dcp.util.Version in project couchbase-elasticsearch-connector by couchbase.
the class ElasticsearchConnector method run.
public static void run(ConnectorConfig config, PanicButton panicButton, Duration startupQuietPeriod) throws Throwable {
final Throwable fatalError;
final Membership membership = config.group().staticMembership();
LOGGER.info("Read configuration: {}", redactSystem(config));
final ScheduledExecutorService checkpointExecutor = Executors.newSingleThreadScheduledExecutor();
try (Slf4jReporter metricReporter = newSlf4jReporter(config.metrics().logInterval());
HttpServer httpServer = new HttpServer(config.metrics().httpPort(), membership);
RestHighLevelClient esClient = newElasticsearchClient(config.elasticsearch(), config.trustStore())) {
DocumentLifecycle.setLogLevel(config.logging().logDocumentLifecycle() ? LogLevel.INFO : LogLevel.DEBUG);
LogRedaction.setRedactionLevel(config.logging().redactionLevel());
DcpHelper.setRedactionLevel(config.logging().redactionLevel());
final ClusterEnvironment env = CouchbaseHelper.environmentBuilder(config.couchbase(), config.trustStore()).build();
final Cluster cluster = CouchbaseHelper.createCluster(config.couchbase(), env);
final Version elasticsearchVersion = waitForElasticsearchAndRequireVersion(esClient, new Version(2, 0, 0), new Version(5, 6, 16));
LOGGER.info("Elasticsearch version {}", elasticsearchVersion);
validateConfig(elasticsearchVersion, config.elasticsearch());
// Wait for couchbase server to come online, then open the bucket.
final Bucket bucket = CouchbaseHelper.waitForBucket(cluster, config.couchbase().bucket());
final Set<SeedNode> kvNodes = CouchbaseHelper.getKvNodes(config.couchbase(), bucket);
final boolean storeMetadataInSourceBucket = config.couchbase().metadataBucket().equals(config.couchbase().bucket());
final Bucket metadataBucket = storeMetadataInSourceBucket ? bucket : CouchbaseHelper.waitForBucket(cluster, config.couchbase().metadataBucket());
final Collection metadataCollection = CouchbaseHelper.getMetadataCollection(metadataBucket, config.couchbase());
final CheckpointDao checkpointDao = new CouchbaseCheckpointDao(metadataCollection, config.group().name());
// todo get this from dcp client
final String bucketUuid = "";
final CheckpointService checkpointService = new CheckpointService(bucketUuid, checkpointDao);
final RequestFactory requestFactory = new RequestFactory(config.elasticsearch().types(), config.elasticsearch().docStructure(), config.elasticsearch().rejectLog());
final ElasticsearchWorkerGroup workers = new ElasticsearchWorkerGroup(esClient, checkpointService, requestFactory, ErrorListener.NOOP, config.elasticsearch().bulkRequest());
Metrics.gauge("write.queue", "Document events currently buffered in memory.", workers, ElasticsearchWorkerGroup::getQueueSize);
// High value indicates the connector has stalled
Metrics.gauge("es.wait.ms", null, workers, ElasticsearchWorkerGroup::getCurrentRequestMillis);
// Same as "es.wait.ms" but normalized to seconds for Prometheus
Metrics.gauge("es.wait.seconds", "Duration of in-flight Elasticsearch bulk request (including any retries). Long duration may indicate connector has stalled.", workers, value -> value.getCurrentRequestMillis() / (double) SECONDS.toMillis(1));
final Client dcpClient = DcpHelper.newClient(config.group().name(), config.couchbase(), kvNodes, config.trustStore());
initEventListener(dcpClient, panicButton, workers::submit);
final Thread saveCheckpoints = new Thread(checkpointService::save, "save-checkpoints");
try {
try {
dcpClient.connect().block(Duration.ofMillis(config.couchbase().dcp().connectTimeout().millis()));
} catch (Throwable t) {
panicButton.panic("Failed to establish initial DCP connection within " + config.couchbase().dcp().connectTimeout(), t);
}
final int numPartitions = dcpClient.numPartitions();
LOGGER.info("Bucket has {} partitions. Membership = {}", numPartitions, membership);
final Set<Integer> partitions = membership.getPartitions(numPartitions);
if (partitions.isEmpty()) {
// need to do this check, because if we started streaming with an empty list, the DCP client would open streams for *all* partitions
throw new IllegalArgumentException("There are more workers than Couchbase vbuckets; this worker doesn't have any work to do.");
}
checkpointService.init(numPartitions, () -> DcpHelper.getCurrentSeqnosAsMap(dcpClient, partitions, Duration.ofSeconds(5)));
dcpClient.initializeState(StreamFrom.BEGINNING, StreamTo.INFINITY).block();
initSessionState(dcpClient, checkpointService, partitions);
// configuration problems.
if (!startupQuietPeriod.isZero()) {
LOGGER.info("Entering startup quiet period; sleeping for {} so peers can terminate in case of unsafe scaling.", startupQuietPeriod);
MILLISECONDS.sleep(startupQuietPeriod.toMillis());
LOGGER.info("Startup quiet period complete.");
}
checkpointExecutor.scheduleWithFixedDelay(checkpointService::save, 10, 10, SECONDS);
RuntimeHelper.addShutdownHook(saveCheckpoints);
// Unless shutdown is due to panic...
panicButton.addPrePanicHook(() -> RuntimeHelper.removeShutdownHook(saveCheckpoints));
try {
LOGGER.debug("Opening DCP streams for partitions: {}", partitions);
dcpClient.startStreaming(partitions).block();
} catch (RuntimeException e) {
ThrowableHelper.propagateCauseIfPossible(e, InterruptedException.class);
throw e;
}
// Start HTTP server *after* other setup is complete, so the metrics endpoint
// can be used as a "successful startup" probe.
httpServer.start();
if (config.metrics().httpPort() >= 0) {
LOGGER.info("Prometheus metrics available at http://localhost:{}/metrics/prometheus", httpServer.getBoundPort());
LOGGER.info("Dropwizard metrics available at http://localhost:{}/metrics/dropwizard?pretty", httpServer.getBoundPort());
} else {
LOGGER.info("Metrics HTTP server is disabled. Edit the [metrics] 'httpPort' config property to enable.");
}
LOGGER.info("Elasticsearch connector startup complete.");
fatalError = workers.awaitFatalError();
LOGGER.error("Terminating due to fatal error from worker", fatalError);
} catch (InterruptedException shutdownRequest) {
LOGGER.info("Graceful shutdown requested. Saving checkpoints and cleaning up.");
checkpointService.save();
throw shutdownRequest;
} catch (Throwable t) {
LOGGER.error("Terminating due to fatal error during setup", t);
throw t;
} finally {
// If we get here it means there was a fatal exception, or the connector is running in distributed
// or test mode and a graceful shutdown was requested. Don't need the shutdown hook for any of those cases.
RuntimeHelper.removeShutdownHook(saveCheckpoints);
checkpointExecutor.shutdown();
metricReporter.stop();
dcpClient.disconnect().block();
// to avoid buffer leak, must close *after* dcp client stops feeding it events
workers.close();
checkpointExecutor.awaitTermination(10, SECONDS);
cluster.disconnect();
// can't reuse, because connector config might have different SSL settings next time
env.shutdown();
}
}
// give stdout a chance to quiet down so the stack trace on stderr isn't interleaved with stdout.
MILLISECONDS.sleep(500);
throw fatalError;
}
use of com.couchbase.client.dcp.util.Version in project couchbase-elasticsearch-connector by couchbase.
the class CouchbaseOps method tryParseVersion.
private static Optional<Version> tryParseVersion(final String versionString) {
try {
// We get a string like "Couchbase Server 5.5.0-2036 (EE)". The version parser
// tolerates trailing garbage, but not leading garbage, so...
final int actualStartIndex = indexOfFirstDigit(versionString);
if (actualStartIndex == -1) {
return Optional.empty();
}
final String versionWithoutLeadingGarbage = versionString.substring(actualStartIndex);
final Version version = Version.parseVersion(versionWithoutLeadingGarbage);
// builds off master branch might have version 0.0.0 :-(
return version.major() == 0 ? Optional.empty() : Optional.of(version);
} catch (Exception e) {
log.warn("Failed to parse version string '{}'", versionString, e);
return Optional.empty();
}
}
use of com.couchbase.client.dcp.util.Version in project couchbase-elasticsearch-connector by couchbase.
the class ElasticsearchHelper method waitForElasticsearchAndRequireVersion.
public static Version waitForElasticsearchAndRequireVersion(RestHighLevelClient esClient, Version required, Version recommended) throws InterruptedException {
final Iterator<TimeValue> retryDelays = truncatedExponentialBackoff(TimeValue.timeValueSeconds(1), TimeValue.timeValueMinutes(1)).iterator();
while (true) {
try {
org.elasticsearch.Version esVersion = esClient.info().getVersion();
final Version version = new Version(esVersion.major, esVersion.minor, esVersion.revision);
if (version.compareTo(required) < 0) {
throw new RuntimeException("Elasticsearch version " + required + " or later required; actual version is " + version);
}
if (version.compareTo(recommended) < 0) {
LOGGER.warn("Elasticsearch version " + version + " is lower than recommended version " + recommended + ".");
}
return version;
} catch (Exception e) {
final TimeValue delay = retryDelays.next();
LOGGER.warn("Failed to connect to Elasticsearch. Retrying in {}", delay, e);
if (ThrowableHelper.hasCause(e, ConnectionClosedException.class)) {
LOGGER.warn(" Troubleshooting tip: If the Elasticsearch connection failure persists," + " and if Elasticsearch is configured to require TLS/SSL, then make sure the connector is also configured to use secure connections.");
}
MILLISECONDS.sleep(delay.millis());
}
}
}
use of com.couchbase.client.dcp.util.Version in project couchbase-elasticsearch-connector by couchbase.
the class ElasticsearchConnector method main.
public static void main(String... args) throws Throwable {
LOGGER.info("Couchbase Elasticsearch Connector version {}", getVersionString());
final OptionsParser parser = new OptionsParser();
final OptionSet options = parser.parse(args);
final File configFile = options.valueOf(parser.configFile);
System.out.println("Reading connector configuration from " + configFile.getAbsoluteFile());
ConnectorConfig config = ConnectorConfig.from(configFile);
final PanicButton panicButton = new DefaultPanicButton();
boolean watchK8sReplicas = "true".equals(System.getenv("CBES_K8S_WATCH_REPLICAS"));
boolean getMemberNumberFromHostname = watchK8sReplicas || "true".equals(System.getenv("CBES_K8S_STATEFUL_SET"));
if (getMemberNumberFromHostname) {
int memberNumber = StatefulSetInfo.fromHostname().podOrdinal + 1;
LOGGER.info("Getting group member number from Kubernetes pod hostname: {}", memberNumber);
// This is a kludge. The Membership class validates its arguments, so you can't have a Membership
// of "4 of 1", for example. If we plan to get the group size from the Kubernetes StatefulSet,
// bypass this validation by temporarily setting the group size to the largest sane value (1024).
// We'll dial it down to the actual size of the StatefulSet a bit later on.
int clusterSize = watchK8sReplicas ? 1024 : config.group().staticMembership().getClusterSize();
config = transformMembership(config, m -> Membership.of(memberNumber, clusterSize));
}
KubernetesClient k8sClient = null;
try {
if (watchK8sReplicas) {
k8sClient = new DefaultKubernetesClient();
LOGGER.info("Activating native Kubernetes integration; connector will use StatefulSet spec" + " to determine group size." + " This mode requires a Kubernetes service account with 'get' and 'watch', and 'list'" + " permissions for the StatefulSet.");
int k8sReplicas = ReplicaChangeWatcher.getReplicasAndPanicOnChange(k8sClient, panicButton);
config = transformMembership(config, m -> Membership.of(m.getMemberNumber(), k8sReplicas));
}
if (watchK8sReplicas || getMemberNumberFromHostname) {
LOGGER.info("Patched configuration with info from Kubernetes environment; membership = {}", config.group().staticMembership());
}
if (config.group().staticMembership().getClusterSize() > 1024) {
panicButton.panic("Invalid group size configuration; totalMembers must be <= 1024." + " Did you forget to set the CBES_TOTAL_MEMBERS environment variable?");
}
Duration startupQuietPeriod = watchK8sReplicas ? ReplicaChangeWatcher.startupQuietPeriod() : Duration.ZERO;
run(config, panicButton, startupQuietPeriod);
} finally {
if (k8sClient != null) {
// so client threads don't prevent app from exiting
k8sClient.close();
}
}
}
Aggregations