use of com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column in project dsbulk by datastax.
the class JsonEndToEndSimulacronIT method full_unload_custom_features.
@Test
void full_unload_custom_features() throws Exception {
List<LinkedHashMap<String, Object>> rows = new ArrayList<>();
LinkedHashMap<String, Object> row = new LinkedHashMap<>();
row.put("pk", 1);
row.put("c1", null);
rows.add(row);
SimulacronUtils.primeTables(simulacron, new SimulacronUtils.Keyspace("ks1", new Table("table1", singletonList(new Column("pk", INT)), emptyList(), singletonList(new Column("c1", TEXT)), rows)));
String[] args = { "unload", "-c", "json", "--connector.json.url", StringUtils.quoteJson(unloadDir), "--connector.json.maxConcurrentFiles", "1", "--schema.query", "SELECT pk, c1 FROM ks1.table1", "--connector.json.generatorFeatures", "{QUOTE_FIELD_NAMES = false}", "--connector.json.serializationStrategy", "NON_NULL" };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateQueryCount(simulacron, 1, "SELECT pk, c1 FROM ks1.table1 WHERE token(pk) > :start AND token(pk) <= :end", LOCAL_ONE);
validateOutputFiles(1, unloadDir);
Optional<String> line = FileUtils.readAllLinesInDirectoryAsStream(unloadDir).findFirst();
assertThat(line).isPresent().hasValue("{pk:1}");
}
use of com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column in project dsbulk by datastax.
the class PrometheusEndToEndSimulacronIT method should_exchange_metrics_by_pull_and_push.
@Test
void should_exchange_metrics_by_pull_and_push() throws IOException, InterruptedException {
MockConnector.setDelegate(newConnectorDelegate());
SimulacronUtils.primeTables(simulacron, new Keyspace("ks1", new Table("table1", new Column("pk", TEXT), new Column("cc", TEXT), new Column("v", TEXT))));
String[] args = { "load", "-c", "mock", "--engine.executionId", "LOAD1", "--schema.keyspace", "ks1", "--schema.table", "table1", "--monitoring.console", "false", "--monitoring.prometheus.push.enabled", "true", "--monitoring.prometheus.pull.enabled", "true", "--monitoring.prometheus.labels", "{ foo = bar }", "--monitoring.prometheus.job", "job1", "--monitoring.prometheus.push.groupBy.instance", "true", "--monitoring.prometheus.push.url", "http://" + gateway.getHost() + ":" + gateway.getMappedPort(9091), "--monitoring.trackBytes", "true", "--driver.advanced.metrics.node.enabled", "pool.open-connections" };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
assertThat(logs).hasMessageContaining("Prometheus Metrics HTTP server listening on 0.0.0.0:8080");
// Give some time for Prometheus to scrape the gateway
Thread.sleep(7_000);
// assert that metrics were pulled directly from DSBulk
URL prometheusPullQuery = new URL("http", "localhost", prometheus.getMappedPort(9090), "/api/v1/query?query=dsbulk_records_total" + "{instance=\"host.testcontainers.internal:8080\"}[5m]");
assertThat(Resources.toString(prometheusPullQuery, StandardCharsets.UTF_8)).contains("\"status\":\"success\"").contains("dsbulk_records_total").contains("\"application_name\":\"DataStax Bulk Loader LOAD1\"").contains("\"application_version\":\"" + WorkflowUtils.getBulkLoaderVersion() + "\"").contains("\"client_id\":\"fc93e4ac-7fa5-394f-814e-21b735d04c10\"").contains("\"driver_version\":\"" + Session.OSS_DRIVER_COORDINATES.getVersion() + "\"").contains("\"instance\":\"host.testcontainers.internal:8080\"").contains("\"job\":\"dsbulk\"").contains("\"exported_job\":\"job1\"").contains("\"foo\":\"bar\"").contains("\"operation_id\":\"LOAD1\"");
URL prometheusDriverPullQuery = new URL("http", "localhost", prometheus.getMappedPort(9090), "/api/v1/query?query=dsbulk_driver_nodes_pool_open_connections" + "{instance=\"host.testcontainers.internal:8080\"}[5m]");
assertThat(Resources.toString(prometheusDriverPullQuery, StandardCharsets.UTF_8)).contains("\"status\":\"success\"").contains("dsbulk_driver_nodes_pool_open_connections").contains("\"node\":\"" + hostname + ":" + port + "\"").contains("\"application_name\":\"DataStax Bulk Loader LOAD1\"").contains("\"application_version\":\"" + WorkflowUtils.getBulkLoaderVersion() + "\"").contains("\"client_id\":\"fc93e4ac-7fa5-394f-814e-21b735d04c10\"").contains("\"driver_version\":\"" + Session.OSS_DRIVER_COORDINATES.getVersion() + "\"").contains("\"instance\":\"host.testcontainers.internal:8080\"").contains("\"job\":\"dsbulk\"").contains("\"exported_job\":\"job1\"").contains("\"foo\":\"bar\"").contains("\"operation_id\":\"LOAD1\"");
// assert that metrics were pushed to the gateway
URL gatewayQuery = new URL("http", "localhost", gateway.getMappedPort(9091), "/metrics");
String labelsAndValue = "\\{" + "application_name=\"DataStax Bulk Loader LOAD1\"," + "application_version=\"" + WorkflowUtils.getBulkLoaderVersion() + "\"," + "client_id=\"fc93e4ac-7fa5-394f-814e-21b735d04c10\"," + "driver_version=\"" + Session.OSS_DRIVER_COORDINATES.getVersion() + "\"," + "foo=\"bar\"," + "instance=\".+\"," + "job=\"job1\"," + "operation_id=\"LOAD1\"} .+";
assertThat(Resources.readLines(gatewayQuery, StandardCharsets.UTF_8)).anySatisfy(line -> assertThat(line).matches("dsbulk_success" + labelsAndValue)).anySatisfy(line -> assertThat(line).matches("dsbulk_last_success" + labelsAndValue)).anySatisfy(line -> assertThat(line).matches("dsbulk_records_total" + labelsAndValue)).allSatisfy(line -> assertThat(line).doesNotContain("dsbulk_driver_nodes_pool_open_connections"));
// assert that Prometheus scraped DSBulk metrics from the gateway
URL prometheusPullGatewayQuery = new URL("http", "localhost", prometheus.getMappedPort(9090), "/api/v1/query?query=dsbulk_records_total{instance=\"gateway:9091\"}[5m]");
assertThat(Resources.toString(prometheusPullGatewayQuery, StandardCharsets.UTF_8)).contains("\"status\":\"success\"").contains("dsbulk_records_total").contains("\"application_name\":\"DataStax Bulk Loader LOAD1\"").contains("\"application_version\":\"" + WorkflowUtils.getBulkLoaderVersion() + "\"").contains("\"client_id\":\"fc93e4ac-7fa5-394f-814e-21b735d04c10\"").contains("\"driver_version\":\"" + Session.OSS_DRIVER_COORDINATES.getVersion() + "\"").contains("\"instance\":\"gateway:9091\"").contains("\"job\":\"gateway\"").contains("\"exported_job\":\"job1\"").contains("\"foo\":\"bar\"").contains("\"operation_id\":\"LOAD1\"");
// assert that driver metrics did not arrive to Prometheus through the gateway
URL prometheusDriverPullGatewayQuery = new URL("http", "localhost", prometheus.getMappedPort(9090), "/api/v1/query?query=dsbulk_driver_nodes_pool_open_connections{instance=\"gateway:9091\"}[5m]");
assertThat(Resources.toString(prometheusDriverPullGatewayQuery, StandardCharsets.UTF_8)).isEqualTo("{\"status\":\"success\",\"data\":{\"resultType\":\"matrix\",\"result\":[]}}");
}
use of com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column in project dsbulk by datastax.
the class CSVEndToEndSimulacronIT method error_load_missing_field.
@Test
void error_load_missing_field() throws Exception {
SimulacronUtils.primeTables(simulacron, new Keyspace("ks1", new Table("table1", new Column("a", INT), new Column("b", TEXT), new Column("c", BOOLEAN), new Column("d", INT))));
String[] args = { "load", "--log.maxErrors", "2", "--log.verbosity", "2", "-header", "true", "--connector.csv.url", StringUtils.quoteJson(getClass().getResource("/missing-extra.csv")), "--schema.query", "INSERT INTO ks1.table1 (a, b, c, d) VALUES (:a, :b, :c, :d)", "--schema.mapping", "A = a, B = b, C = c, D = d", "--schema.allowMissingFields", "false" };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_ABORTED_TOO_MANY_ERRORS);
assertThat(logs.getAllMessagesAsString()).contains("aborted: Too many errors, the maximum allowed is 2").contains("Records: total: 3, successful: 0, failed: 3");
validateNumberOfBadRecords(3);
validateExceptionsLog(3, "Required field D (mapped to column d) was missing from record", "mapping-errors.log");
}
use of com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column in project dsbulk by datastax.
the class CSVEndToEndSimulacronIT method error_load_extra_field.
@Test
void error_load_extra_field() throws Exception {
SimulacronUtils.primeTables(simulacron, new Keyspace("ks1", new Table("table1", new Column("a", INT), new Column("b", TEXT))));
String[] args = { "load", "--log.maxErrors", "2", "--log.verbosity", "2", "-header", "true", "--connector.csv.url", StringUtils.quoteJson(getClass().getResource("/missing-extra.csv")), "--schema.query", "INSERT INTO ks1.table1 (a, b) VALUES (:a, :b)", "--schema.mapping", "A = a, B = b", "--schema.allowExtraFields", "false" };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_ABORTED_TOO_MANY_ERRORS);
assertThat(logs.getAllMessagesAsString()).contains("aborted: Too many errors, the maximum allowed is 2").contains("Records: total: 3, successful: 0, failed: 3");
validateNumberOfBadRecords(3);
validateExceptionsLog(3, "Extraneous field C was found in record", "mapping-errors.log");
}
use of com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column in project dsbulk by datastax.
the class CSVEndToEndSimulacronIT method unload_binary.
@ParameterizedTest
@CsvSource({ "BASE64,1|yv66vg==", "HEX,1|0xcafebabe" })
void unload_binary(String format, String expected) throws Exception {
SimulacronUtils.primeTables(simulacron, new Keyspace("ks1", new Table("table1", Collections.singletonList(new Column("pk", INT)), Collections.emptyList(), Collections.singletonList(new Column("v", BLOB)), Lists.newArrayList(new LinkedHashMap<>(ImmutableMap.of("pk", 1, "v", ByteUtils.fromHexString("0xcafebabe")))))));
String[] args = { "unload", "--connector.csv.header", "false", "--connector.csv.delimiter", quoteJson("|"), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.maxConcurrentFiles", "1", "--schema.keyspace", "ks1", "--schema.table", "table1", "--codec.binary", format };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
assertThat(readAllLinesInDirectoryAsStream(unloadDir)).containsExactly(expected);
}
Aggregations