Search in sources :

Example 1 with Connector

use of com.datastax.oss.dsbulk.connectors.api.Connector in project dsbulk by datastax.

the class ConnectorSettingsTest method should_find_csv_connector_short_name.

@Test
void should_find_csv_connector_short_name() {
    Config config = ConfigFactory.parseString("name: csv, csv{url:\"file:///a/b.csv\"}").withFallback(CONNECTOR_DEFAULT_SETTINGS);
    ConnectorSettings connectorSettings = new ConnectorSettings(config, true);
    connectorSettings.init(true);
    Connector connector = connectorSettings.getConnector();
    assertThat(connector).isNotNull().isInstanceOf(CSVConnector.class);
    assertThat(config.getConfig("csv")).hasPaths("url", "fileNamePattern", "recursive", "maxConcurrentFiles", "encoding", "header", "delimiter", "quote", "escape", "comment", "skipRecords", "maxRecords").doesNotHavePath("csv");
}
Also used : CSVConnector(com.datastax.oss.dsbulk.connectors.csv.CSVConnector) Connector(com.datastax.oss.dsbulk.connectors.api.Connector) JsonConnector(com.datastax.oss.dsbulk.connectors.json.JsonConnector) Config(com.typesafe.config.Config) Test(org.junit.jupiter.api.Test)

Example 2 with Connector

use of com.datastax.oss.dsbulk.connectors.api.Connector in project dsbulk by datastax.

the class PrometheusEndToEndSimulacronIT method newConnectorDelegate.

@NonNull
private Connector newConnectorDelegate() {
    return new Connector() {

        @Override
        public int readConcurrency() {
            return 1;
        }

        @Override
        public boolean supports(@NonNull ConnectorFeature feature) {
            return feature == CommonConnectorFeature.INDEXED_RECORDS;
        }

        @NonNull
        @Override
        public Publisher<Publisher<Record>> read() {
            AtomicInteger counter = new AtomicInteger();
            AtomicBoolean running = new AtomicBoolean(true);
            return Flux.just(Flux.generate(sink -> {
                int i = counter.getAndAdd(1);
                if (i == 0) {
                    startTimer(running);
                }
                if (running.get()) {
                    Record record = RecordUtils.indexedCSV("pk", "pk" + 1, "cc", "cc" + 1, "v", "v" + 1);
                    sink.next(record);
                } else {
                    sink.complete();
                }
            }));
        }

        @NonNull
        @Override
        public RecordMetadata getRecordMetadata() {
            return (fieldType, cqlType) -> GenericType.STRING;
        }

        @NonNull
        @Override
        public Function<Publisher<Record>, Publisher<Record>> write() {
            throw new UnsupportedOperationException();
        }

        @Override
        public int writeConcurrency() {
            throw new UnsupportedOperationException();
        }
    };
}
Also used : SimulacronUtils(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils) BoundCluster(com.datastax.oss.simulacron.server.BoundCluster) Connector(com.datastax.oss.dsbulk.connectors.api.Connector) DockerImageName(org.testcontainers.utility.DockerImageName) URL(java.net.URL) MockConnector(com.datastax.oss.dsbulk.runner.tests.MockConnector) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) GenericType(com.datastax.oss.driver.api.core.type.reflect.GenericType) Timer(java.util.Timer) Function(java.util.function.Function) Network(org.testcontainers.containers.Network) RecordUtils(com.datastax.oss.dsbulk.runner.tests.RecordUtils) RecordMetadata(com.datastax.oss.dsbulk.connectors.api.RecordMetadata) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BeforeAll(org.junit.jupiter.api.BeforeAll) Keyspace(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace) NonNull(edu.umd.cs.findbugs.annotations.NonNull) EndToEndUtils.assertStatus(com.datastax.oss.dsbulk.runner.tests.EndToEndUtils.assertStatus) Resources(com.datastax.oss.driver.shaded.guava.common.io.Resources) GenericContainer(org.testcontainers.containers.GenericContainer) Tag(org.junit.jupiter.api.Tag) TimerTask(java.util.TimerTask) Record(com.datastax.oss.dsbulk.connectors.api.Record) LogInterceptor(com.datastax.oss.dsbulk.tests.logging.LogInterceptor) Container(org.testcontainers.junit.jupiter.Container) Session(com.datastax.oss.driver.api.core.session.Session) CommonConnectorFeature(com.datastax.oss.dsbulk.connectors.api.CommonConnectorFeature) WorkflowUtils(com.datastax.oss.dsbulk.workflow.api.utils.WorkflowUtils) Table(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Table) TestAssertions.assertThat(com.datastax.oss.dsbulk.tests.assertions.TestAssertions.assertThat) Publisher(org.reactivestreams.Publisher) Testcontainers(org.testcontainers.junit.jupiter.Testcontainers) IOException(java.io.IOException) ConnectorFeature(com.datastax.oss.dsbulk.connectors.api.ConnectorFeature) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) LogCapture(com.datastax.oss.dsbulk.tests.logging.LogCapture) StandardCharsets(java.nio.charset.StandardCharsets) TimeUnit(java.util.concurrent.TimeUnit) Test(org.junit.jupiter.api.Test) STATUS_OK(com.datastax.oss.dsbulk.runner.ExitStatus.STATUS_OK) Flux(reactor.core.publisher.Flux) ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) MountableFile(org.testcontainers.utility.MountableFile) Column(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column) TEXT(com.datastax.oss.driver.api.core.type.DataTypes.TEXT) Connector(com.datastax.oss.dsbulk.connectors.api.Connector) MockConnector(com.datastax.oss.dsbulk.runner.tests.MockConnector) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NonNull(edu.umd.cs.findbugs.annotations.NonNull) Record(com.datastax.oss.dsbulk.connectors.api.Record) Publisher(org.reactivestreams.Publisher) CommonConnectorFeature(com.datastax.oss.dsbulk.connectors.api.CommonConnectorFeature) ConnectorFeature(com.datastax.oss.dsbulk.connectors.api.ConnectorFeature) NonNull(edu.umd.cs.findbugs.annotations.NonNull)

Example 3 with Connector

use of com.datastax.oss.dsbulk.connectors.api.Connector in project dsbulk by datastax.

the class UnloadWorkflow method manyWriters.

private Flux<Record> manyWriters() {
    // writeConcurrency and readConcurrency are >= 0.5C here
    int actualConcurrency = Math.min(readConcurrency, writeConcurrency);
    int numThreads = Math.min(numCores * 2, actualConcurrency);
    Scheduler scheduler = Schedulers.newParallel(numThreads, new DefaultThreadFactory("workflow"));
    schedulers.add(scheduler);
    return Flux.fromIterable(readStatements).flatMap(results -> {
        Flux<Record> records = Flux.from(executor.readReactive(results)).publishOn(scheduler, 500).transform(queryWarningsHandler).transform(totalItemsMonitor).transform(totalItemsCounter).transform(failedReadResultsMonitor).transform(failedReadsHandler).map(readResultMapper::map).transform(failedRecordsMonitor).transform(unmappableRecordsHandler);
        if (actualConcurrency == writeConcurrency) {
            records = records.transform(writer);
        } else {
            // If the actual concurrency is lesser than the connector's desired write
            // concurrency, we need to give the connector a chance to switch writers
            // frequently so that it can really redirect records to all the final destinations
            // (to that many files on disk for example). If the connector is correctly
            // implemented, each window will be redirected to a different destination
            // in a round-robin fashion.
            records = records.window(500).flatMap(window -> window.transform(writer), 1, 500);
        }
        return records.transform(failedRecordsMonitor).transform(failedRecordsHandler);
    }, actualConcurrency, 500);
}
Also used : DefaultThreadFactory(io.netty.util.concurrent.DefaultThreadFactory) ReadResult(com.datastax.oss.dsbulk.executor.api.result.ReadResult) Connector(com.datastax.oss.dsbulk.connectors.api.Connector) DefaultThreadFactory(io.netty.util.concurrent.DefaultThreadFactory) BulkReader(com.datastax.oss.dsbulk.executor.api.reader.BulkReader) DriverSettings(com.datastax.oss.dsbulk.workflow.commons.settings.DriverSettings) LoggerFactory(org.slf4j.LoggerFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Workflow(com.datastax.oss.dsbulk.workflow.api.Workflow) Scheduler(reactor.core.scheduler.Scheduler) Function(java.util.function.Function) ExecutorSettings(com.datastax.oss.dsbulk.workflow.commons.settings.ExecutorSettings) SchemaSettings(com.datastax.oss.dsbulk.workflow.commons.settings.SchemaSettings) HashSet(java.util.HashSet) RecordMetadata(com.datastax.oss.dsbulk.connectors.api.RecordMetadata) CqlSession(com.datastax.oss.driver.api.core.CqlSession) ConnectorSettings(com.datastax.oss.dsbulk.workflow.commons.settings.ConnectorSettings) Duration(java.time.Duration) SchemaGenerationStrategy(com.datastax.oss.dsbulk.workflow.commons.settings.SchemaGenerationStrategy) Schedulers(reactor.core.scheduler.Schedulers) Record(com.datastax.oss.dsbulk.connectors.api.Record) Stopwatch(com.datastax.oss.driver.shaded.guava.common.base.Stopwatch) CommonConnectorFeature(com.datastax.oss.dsbulk.connectors.api.CommonConnectorFeature) Logger(org.slf4j.Logger) Config(com.typesafe.config.Config) LogSettings(com.datastax.oss.dsbulk.workflow.commons.settings.LogSettings) Publisher(org.reactivestreams.Publisher) ConvertingCodecFactory(com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory) SettingsManager(com.datastax.oss.dsbulk.workflow.commons.settings.SettingsManager) EngineSettings(com.datastax.oss.dsbulk.workflow.commons.settings.EngineSettings) Set(java.util.Set) ClusterInformationUtils(com.datastax.oss.dsbulk.workflow.commons.utils.ClusterInformationUtils) CodecSettings(com.datastax.oss.dsbulk.workflow.commons.settings.CodecSettings) MonitoringSettings(com.datastax.oss.dsbulk.workflow.commons.settings.MonitoringSettings) TimeUnit(java.util.concurrent.TimeUnit) Flux(reactor.core.publisher.Flux) List(java.util.List) CloseableUtils(com.datastax.oss.dsbulk.workflow.commons.utils.CloseableUtils) ReadResultMapper(com.datastax.oss.dsbulk.workflow.commons.schema.ReadResultMapper) DurationUtils(com.datastax.oss.dsbulk.workflow.api.utils.DurationUtils) MetricsManager(com.datastax.oss.dsbulk.workflow.commons.metrics.MetricsManager) Statement(com.datastax.oss.driver.api.core.cql.Statement) LogManager(com.datastax.oss.dsbulk.workflow.commons.log.LogManager) Scheduler(reactor.core.scheduler.Scheduler) Record(com.datastax.oss.dsbulk.connectors.api.Record)

Example 4 with Connector

use of com.datastax.oss.dsbulk.connectors.api.Connector in project dsbulk by datastax.

the class ConnectorSettingsTest method should_find_json_connector_short_name.

@Test
void should_find_json_connector_short_name() {
    Config config = ConfigFactory.parseString("name: json, json{ url:\"file:///a/b.json\"}").withFallback(CONNECTOR_DEFAULT_SETTINGS);
    ConnectorSettings connectorSettings = new ConnectorSettings(config, true);
    connectorSettings.init(true);
    Connector connector = connectorSettings.getConnector();
    assertThat(connector).isNotNull().isInstanceOf(JsonConnector.class);
    assertThat(config.getConfig("json")).hasPaths("url", "fileNamePattern", "recursive", "maxConcurrentFiles", "encoding", "skipRecords", "maxRecords", "parserFeatures", "generatorFeatures", "prettyPrint").doesNotHavePath("json");
}
Also used : CSVConnector(com.datastax.oss.dsbulk.connectors.csv.CSVConnector) Connector(com.datastax.oss.dsbulk.connectors.api.Connector) JsonConnector(com.datastax.oss.dsbulk.connectors.json.JsonConnector) Config(com.typesafe.config.Config) Test(org.junit.jupiter.api.Test)

Example 5 with Connector

use of com.datastax.oss.dsbulk.connectors.api.Connector in project dsbulk by datastax.

the class MockConnector method mockWrites.

/**
 * Sets up the mock connector to emulate writes; it will store all received records as if they
 * were written to an external sink. The "written" records will appear in the returned list.
 *
 * @return the list where "written" records will be stored.
 */
public static List<Record> mockWrites() {
    List<Record> records = new ArrayList<>();
    setDelegate(new Connector() {

        @Override
        public void init() {
        }

        @Override
        public void configure(@NonNull Config settings, boolean read, boolean retainRecordSources) {
        }

        @Override
        public int readConcurrency() {
            return -1;
        }

        @Override
        public int writeConcurrency() {
            return 1;
        }

        @Override
        public boolean supports(@NonNull ConnectorFeature feature) {
            return true;
        }

        @NonNull
        @Override
        public RecordMetadata getRecordMetadata() {
            return (field, cql) -> GenericType.STRING;
        }

        @NonNull
        @Override
        public Publisher<Publisher<Record>> read() {
            return Flux::just;
        }

        @NonNull
        @Override
        public Function<Publisher<Record>, Publisher<Record>> write() {
            return upstream -> Flux.from(upstream).doOnNext(records::add);
        }
    });
    return records;
}
Also used : Connector(com.datastax.oss.dsbulk.connectors.api.Connector) Config(com.typesafe.config.Config) ArrayList(java.util.ArrayList) Flux(reactor.core.publisher.Flux) Publisher(org.reactivestreams.Publisher) ConnectorFeature(com.datastax.oss.dsbulk.connectors.api.ConnectorFeature) RecordMetadata(com.datastax.oss.dsbulk.connectors.api.RecordMetadata) Function(java.util.function.Function) NonNull(edu.umd.cs.findbugs.annotations.NonNull) Record(com.datastax.oss.dsbulk.connectors.api.Record)

Aggregations

Connector (com.datastax.oss.dsbulk.connectors.api.Connector)6 Config (com.typesafe.config.Config)5 Record (com.datastax.oss.dsbulk.connectors.api.Record)4 RecordMetadata (com.datastax.oss.dsbulk.connectors.api.RecordMetadata)4 Function (java.util.function.Function)4 Publisher (org.reactivestreams.Publisher)4 Flux (reactor.core.publisher.Flux)4 ConnectorFeature (com.datastax.oss.dsbulk.connectors.api.ConnectorFeature)3 Test (org.junit.jupiter.api.Test)3 GenericType (com.datastax.oss.driver.api.core.type.reflect.GenericType)2 CommonConnectorFeature (com.datastax.oss.dsbulk.connectors.api.CommonConnectorFeature)2 CSVConnector (com.datastax.oss.dsbulk.connectors.csv.CSVConnector)2 JsonConnector (com.datastax.oss.dsbulk.connectors.json.JsonConnector)2 NonNull (edu.umd.cs.findbugs.annotations.NonNull)2 ArrayList (java.util.ArrayList)2 TimeUnit (java.util.concurrent.TimeUnit)2 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)2 CqlSession (com.datastax.oss.driver.api.core.CqlSession)1 Statement (com.datastax.oss.driver.api.core.cql.Statement)1 Session (com.datastax.oss.driver.api.core.session.Session)1