Search in sources :

Example 1 with Keyspace

use of com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace in project dsbulk by datastax.

the class PrometheusEndToEndSimulacronIT method should_exchange_metrics_by_pull_and_push.

@Test
void should_exchange_metrics_by_pull_and_push() throws IOException, InterruptedException {
    MockConnector.setDelegate(newConnectorDelegate());
    SimulacronUtils.primeTables(simulacron, new Keyspace("ks1", new Table("table1", new Column("pk", TEXT), new Column("cc", TEXT), new Column("v", TEXT))));
    String[] args = { "load", "-c", "mock", "--engine.executionId", "LOAD1", "--schema.keyspace", "ks1", "--schema.table", "table1", "--monitoring.console", "false", "--monitoring.prometheus.push.enabled", "true", "--monitoring.prometheus.pull.enabled", "true", "--monitoring.prometheus.labels", "{ foo = bar }", "--monitoring.prometheus.job", "job1", "--monitoring.prometheus.push.groupBy.instance", "true", "--monitoring.prometheus.push.url", "http://" + gateway.getHost() + ":" + gateway.getMappedPort(9091), "--monitoring.trackBytes", "true", "--driver.advanced.metrics.node.enabled", "pool.open-connections" };
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    assertThat(logs).hasMessageContaining("Prometheus Metrics HTTP server listening on 0.0.0.0:8080");
    // Give some time for Prometheus to scrape the gateway
    Thread.sleep(7_000);
    // assert that metrics were pulled directly from DSBulk
    URL prometheusPullQuery = new URL("http", "localhost", prometheus.getMappedPort(9090), "/api/v1/query?query=dsbulk_records_total" + "{instance=\"host.testcontainers.internal:8080\"}[5m]");
    assertThat(Resources.toString(prometheusPullQuery, StandardCharsets.UTF_8)).contains("\"status\":\"success\"").contains("dsbulk_records_total").contains("\"application_name\":\"DataStax Bulk Loader LOAD1\"").contains("\"application_version\":\"" + WorkflowUtils.getBulkLoaderVersion() + "\"").contains("\"client_id\":\"fc93e4ac-7fa5-394f-814e-21b735d04c10\"").contains("\"driver_version\":\"" + Session.OSS_DRIVER_COORDINATES.getVersion() + "\"").contains("\"instance\":\"host.testcontainers.internal:8080\"").contains("\"job\":\"dsbulk\"").contains("\"exported_job\":\"job1\"").contains("\"foo\":\"bar\"").contains("\"operation_id\":\"LOAD1\"");
    URL prometheusDriverPullQuery = new URL("http", "localhost", prometheus.getMappedPort(9090), "/api/v1/query?query=dsbulk_driver_nodes_pool_open_connections" + "{instance=\"host.testcontainers.internal:8080\"}[5m]");
    assertThat(Resources.toString(prometheusDriverPullQuery, StandardCharsets.UTF_8)).contains("\"status\":\"success\"").contains("dsbulk_driver_nodes_pool_open_connections").contains("\"node\":\"" + hostname + ":" + port + "\"").contains("\"application_name\":\"DataStax Bulk Loader LOAD1\"").contains("\"application_version\":\"" + WorkflowUtils.getBulkLoaderVersion() + "\"").contains("\"client_id\":\"fc93e4ac-7fa5-394f-814e-21b735d04c10\"").contains("\"driver_version\":\"" + Session.OSS_DRIVER_COORDINATES.getVersion() + "\"").contains("\"instance\":\"host.testcontainers.internal:8080\"").contains("\"job\":\"dsbulk\"").contains("\"exported_job\":\"job1\"").contains("\"foo\":\"bar\"").contains("\"operation_id\":\"LOAD1\"");
    // assert that metrics were pushed to the gateway
    URL gatewayQuery = new URL("http", "localhost", gateway.getMappedPort(9091), "/metrics");
    String labelsAndValue = "\\{" + "application_name=\"DataStax Bulk Loader LOAD1\"," + "application_version=\"" + WorkflowUtils.getBulkLoaderVersion() + "\"," + "client_id=\"fc93e4ac-7fa5-394f-814e-21b735d04c10\"," + "driver_version=\"" + Session.OSS_DRIVER_COORDINATES.getVersion() + "\"," + "foo=\"bar\"," + "instance=\".+\"," + "job=\"job1\"," + "operation_id=\"LOAD1\"} .+";
    assertThat(Resources.readLines(gatewayQuery, StandardCharsets.UTF_8)).anySatisfy(line -> assertThat(line).matches("dsbulk_success" + labelsAndValue)).anySatisfy(line -> assertThat(line).matches("dsbulk_last_success" + labelsAndValue)).anySatisfy(line -> assertThat(line).matches("dsbulk_records_total" + labelsAndValue)).allSatisfy(line -> assertThat(line).doesNotContain("dsbulk_driver_nodes_pool_open_connections"));
    // assert that Prometheus scraped DSBulk metrics from the gateway
    URL prometheusPullGatewayQuery = new URL("http", "localhost", prometheus.getMappedPort(9090), "/api/v1/query?query=dsbulk_records_total{instance=\"gateway:9091\"}[5m]");
    assertThat(Resources.toString(prometheusPullGatewayQuery, StandardCharsets.UTF_8)).contains("\"status\":\"success\"").contains("dsbulk_records_total").contains("\"application_name\":\"DataStax Bulk Loader LOAD1\"").contains("\"application_version\":\"" + WorkflowUtils.getBulkLoaderVersion() + "\"").contains("\"client_id\":\"fc93e4ac-7fa5-394f-814e-21b735d04c10\"").contains("\"driver_version\":\"" + Session.OSS_DRIVER_COORDINATES.getVersion() + "\"").contains("\"instance\":\"gateway:9091\"").contains("\"job\":\"gateway\"").contains("\"exported_job\":\"job1\"").contains("\"foo\":\"bar\"").contains("\"operation_id\":\"LOAD1\"");
    // assert that driver metrics did not arrive to Prometheus through the gateway
    URL prometheusDriverPullGatewayQuery = new URL("http", "localhost", prometheus.getMappedPort(9090), "/api/v1/query?query=dsbulk_driver_nodes_pool_open_connections{instance=\"gateway:9091\"}[5m]");
    assertThat(Resources.toString(prometheusDriverPullGatewayQuery, StandardCharsets.UTF_8)).isEqualTo("{\"status\":\"success\",\"data\":{\"resultType\":\"matrix\",\"result\":[]}}");
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) SimulacronUtils(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils) BoundCluster(com.datastax.oss.simulacron.server.BoundCluster) Connector(com.datastax.oss.dsbulk.connectors.api.Connector) DockerImageName(org.testcontainers.utility.DockerImageName) URL(java.net.URL) MockConnector(com.datastax.oss.dsbulk.runner.tests.MockConnector) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) GenericType(com.datastax.oss.driver.api.core.type.reflect.GenericType) Timer(java.util.Timer) Function(java.util.function.Function) Network(org.testcontainers.containers.Network) RecordUtils(com.datastax.oss.dsbulk.runner.tests.RecordUtils) RecordMetadata(com.datastax.oss.dsbulk.connectors.api.RecordMetadata) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) BeforeAll(org.junit.jupiter.api.BeforeAll) Keyspace(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace) NonNull(edu.umd.cs.findbugs.annotations.NonNull) EndToEndUtils.assertStatus(com.datastax.oss.dsbulk.runner.tests.EndToEndUtils.assertStatus) Resources(com.datastax.oss.driver.shaded.guava.common.io.Resources) GenericContainer(org.testcontainers.containers.GenericContainer) Tag(org.junit.jupiter.api.Tag) TimerTask(java.util.TimerTask) Record(com.datastax.oss.dsbulk.connectors.api.Record) LogInterceptor(com.datastax.oss.dsbulk.tests.logging.LogInterceptor) Container(org.testcontainers.junit.jupiter.Container) Session(com.datastax.oss.driver.api.core.session.Session) CommonConnectorFeature(com.datastax.oss.dsbulk.connectors.api.CommonConnectorFeature) WorkflowUtils(com.datastax.oss.dsbulk.workflow.api.utils.WorkflowUtils) Table(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Table) TestAssertions.assertThat(com.datastax.oss.dsbulk.tests.assertions.TestAssertions.assertThat) Publisher(org.reactivestreams.Publisher) Testcontainers(org.testcontainers.junit.jupiter.Testcontainers) IOException(java.io.IOException) ConnectorFeature(com.datastax.oss.dsbulk.connectors.api.ConnectorFeature) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) LogCapture(com.datastax.oss.dsbulk.tests.logging.LogCapture) StandardCharsets(java.nio.charset.StandardCharsets) TimeUnit(java.util.concurrent.TimeUnit) Test(org.junit.jupiter.api.Test) STATUS_OK(com.datastax.oss.dsbulk.runner.ExitStatus.STATUS_OK) Flux(reactor.core.publisher.Flux) ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) MountableFile(org.testcontainers.utility.MountableFile) Column(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column) TEXT(com.datastax.oss.driver.api.core.type.DataTypes.TEXT) Table(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Table) Column(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column) Keyspace(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) URL(java.net.URL) Test(org.junit.jupiter.api.Test)

Example 2 with Keyspace

use of com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace in project dsbulk by datastax.

the class CSVEndToEndSimulacronIT method error_load_missing_field.

@Test
void error_load_missing_field() throws Exception {
    SimulacronUtils.primeTables(simulacron, new Keyspace("ks1", new Table("table1", new Column("a", INT), new Column("b", TEXT), new Column("c", BOOLEAN), new Column("d", INT))));
    String[] args = { "load", "--log.maxErrors", "2", "--log.verbosity", "2", "-header", "true", "--connector.csv.url", StringUtils.quoteJson(getClass().getResource("/missing-extra.csv")), "--schema.query", "INSERT INTO ks1.table1 (a, b, c, d) VALUES (:a, :b, :c, :d)", "--schema.mapping", "A = a, B = b, C = c, D = d", "--schema.allowMissingFields", "false" };
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_ABORTED_TOO_MANY_ERRORS);
    assertThat(logs.getAllMessagesAsString()).contains("aborted: Too many errors, the maximum allowed is 2").contains("Records: total: 3, successful: 0, failed: 3");
    validateNumberOfBadRecords(3);
    validateExceptionsLog(3, "Required field D (mapped to column d) was missing from record", "mapping-errors.log");
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) EndToEndUtils.primeIpByCountryTable(com.datastax.oss.dsbulk.runner.tests.EndToEndUtils.primeIpByCountryTable) Table(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Table) Column(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column) Keyspace(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 3 with Keyspace

use of com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace in project dsbulk by datastax.

the class CSVEndToEndSimulacronIT method error_load_extra_field.

@Test
void error_load_extra_field() throws Exception {
    SimulacronUtils.primeTables(simulacron, new Keyspace("ks1", new Table("table1", new Column("a", INT), new Column("b", TEXT))));
    String[] args = { "load", "--log.maxErrors", "2", "--log.verbosity", "2", "-header", "true", "--connector.csv.url", StringUtils.quoteJson(getClass().getResource("/missing-extra.csv")), "--schema.query", "INSERT INTO ks1.table1 (a, b) VALUES (:a, :b)", "--schema.mapping", "A = a, B = b", "--schema.allowExtraFields", "false" };
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_ABORTED_TOO_MANY_ERRORS);
    assertThat(logs.getAllMessagesAsString()).contains("aborted: Too many errors, the maximum allowed is 2").contains("Records: total: 3, successful: 0, failed: 3");
    validateNumberOfBadRecords(3);
    validateExceptionsLog(3, "Extraneous field C was found in record", "mapping-errors.log");
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) EndToEndUtils.primeIpByCountryTable(com.datastax.oss.dsbulk.runner.tests.EndToEndUtils.primeIpByCountryTable) Table(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Table) Column(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column) Keyspace(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 4 with Keyspace

use of com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace in project dsbulk by datastax.

the class CSVEndToEndSimulacronIT method unload_binary.

@ParameterizedTest
@CsvSource({ "BASE64,1|yv66vg==", "HEX,1|0xcafebabe" })
void unload_binary(String format, String expected) throws Exception {
    SimulacronUtils.primeTables(simulacron, new Keyspace("ks1", new Table("table1", Collections.singletonList(new Column("pk", INT)), Collections.emptyList(), Collections.singletonList(new Column("v", BLOB)), Lists.newArrayList(new LinkedHashMap<>(ImmutableMap.of("pk", 1, "v", ByteUtils.fromHexString("0xcafebabe")))))));
    String[] args = { "unload", "--connector.csv.header", "false", "--connector.csv.delimiter", quoteJson("|"), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.maxConcurrentFiles", "1", "--schema.keyspace", "ks1", "--schema.table", "table1", "--codec.binary", format };
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    assertThat(readAllLinesInDirectoryAsStream(unloadDir)).containsExactly(expected);
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) EndToEndUtils.primeIpByCountryTable(com.datastax.oss.dsbulk.runner.tests.EndToEndUtils.primeIpByCountryTable) Table(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Table) Column(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column) Keyspace(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) LinkedHashMap(java.util.LinkedHashMap) CsvSource(org.junit.jupiter.params.provider.CsvSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 5 with Keyspace

use of com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace in project dsbulk by datastax.

the class CSVEndToEndSimulacronIT method massive_load_errors.

/**
 * Test for DAT-593. Emulates 100 resources with 1000 records each, among which 100 bad records,
 * for a total of 10,000 failed records. Verifies that LogManager is capable of handling a high
 * number of bad records, without disrupting the main load workflow.
 */
@Test
void massive_load_errors() throws Exception {
    SimulacronUtils.primeTables(simulacron, new Keyspace("ks1", new Table("table1", new Column("pk", TEXT), new Column("cc", TEXT), new Column("v", TEXT))));
    MockConnector.setDelegate(new CSVConnector() {

        @Override
        public void configure(@NonNull Config settings, boolean read, boolean retainRecordSources) {
        }

        @Override
        public void init() {
        }

        @Override
        public int readConcurrency() {
            // to force runner to use maximum parallelism
            return Integer.MAX_VALUE;
        }

        @NonNull
        @Override
        public Publisher<Publisher<Record>> read() {
            List<Publisher<Record>> resources = new ArrayList<>();
            for (int i = 0; i < 100; i++) {
                AtomicInteger counter = new AtomicInteger();
                resources.add(Flux.generate((sink) -> {
                    int next = counter.getAndIncrement();
                    if (next == 1_000) {
                        sink.complete();
                    } else if (next % 10 == 0) {
                        sink.next(RecordUtils.error(new IllegalArgumentException("Record could not be read: " + next)));
                    } else {
                        sink.next(RecordUtils.indexedCSV("pk", String.valueOf(next), "cc", String.valueOf(next), "v", String.valueOf(next)));
                    }
                }));
            }
            return Flux.fromIterable(resources);
        }
    });
    String[] args = { "load", "-c", "mock", "--log.maxErrors", "10000", "--schema.keyspace", "ks1", "--schema.table", "table1" };
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_COMPLETED_WITH_ERRORS);
    assertThat(logs.getAllMessagesAsString()).contains("completed with 10000 errors").contains("Records: total: 100,000, successful: 90,000, failed: 10,000");
    validateExceptionsLog(10_000, "Record could not be read:", "connector-errors.log");
}
Also used : EndToEndUtils.primeIpByCountryTable(com.datastax.oss.dsbulk.runner.tests.EndToEndUtils.primeIpByCountryTable) Table(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Table) Config(com.typesafe.config.Config) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) Publisher(org.reactivestreams.Publisher) ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) Column(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Keyspace(com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace) NonNull(edu.umd.cs.findbugs.annotations.NonNull) Record(com.datastax.oss.dsbulk.connectors.api.Record) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) ArrayList(java.util.ArrayList) CSVConnector(com.datastax.oss.dsbulk.connectors.csv.CSVConnector) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

DataStaxBulkLoader (com.datastax.oss.dsbulk.runner.DataStaxBulkLoader)5 ExitStatus (com.datastax.oss.dsbulk.runner.ExitStatus)5 Column (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column)5 Keyspace (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace)5 Table (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Table)5 EndToEndUtils.primeIpByCountryTable (com.datastax.oss.dsbulk.runner.tests.EndToEndUtils.primeIpByCountryTable)4 Test (org.junit.jupiter.api.Test)4 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)4 Record (com.datastax.oss.dsbulk.connectors.api.Record)2 NonNull (edu.umd.cs.findbugs.annotations.NonNull)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 Publisher (org.reactivestreams.Publisher)2 Session (com.datastax.oss.driver.api.core.session.Session)1 TEXT (com.datastax.oss.driver.api.core.type.DataTypes.TEXT)1 GenericType (com.datastax.oss.driver.api.core.type.reflect.GenericType)1 Resources (com.datastax.oss.driver.shaded.guava.common.io.Resources)1 CommonConnectorFeature (com.datastax.oss.dsbulk.connectors.api.CommonConnectorFeature)1 Connector (com.datastax.oss.dsbulk.connectors.api.Connector)1 ConnectorFeature (com.datastax.oss.dsbulk.connectors.api.ConnectorFeature)1 RecordMetadata (com.datastax.oss.dsbulk.connectors.api.RecordMetadata)1