use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class PrometheusEndToEndSimulacronIT method should_exchange_metrics_by_pull_and_push.
@Test
void should_exchange_metrics_by_pull_and_push() throws IOException, InterruptedException {
MockConnector.setDelegate(newConnectorDelegate());
SimulacronUtils.primeTables(simulacron, new Keyspace("ks1", new Table("table1", new Column("pk", TEXT), new Column("cc", TEXT), new Column("v", TEXT))));
String[] args = { "load", "-c", "mock", "--engine.executionId", "LOAD1", "--schema.keyspace", "ks1", "--schema.table", "table1", "--monitoring.console", "false", "--monitoring.prometheus.push.enabled", "true", "--monitoring.prometheus.pull.enabled", "true", "--monitoring.prometheus.labels", "{ foo = bar }", "--monitoring.prometheus.job", "job1", "--monitoring.prometheus.push.groupBy.instance", "true", "--monitoring.prometheus.push.url", "http://" + gateway.getHost() + ":" + gateway.getMappedPort(9091), "--monitoring.trackBytes", "true", "--driver.advanced.metrics.node.enabled", "pool.open-connections" };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
assertThat(logs).hasMessageContaining("Prometheus Metrics HTTP server listening on 0.0.0.0:8080");
// Give some time for Prometheus to scrape the gateway
Thread.sleep(7_000);
// assert that metrics were pulled directly from DSBulk
URL prometheusPullQuery = new URL("http", "localhost", prometheus.getMappedPort(9090), "/api/v1/query?query=dsbulk_records_total" + "{instance=\"host.testcontainers.internal:8080\"}[5m]");
assertThat(Resources.toString(prometheusPullQuery, StandardCharsets.UTF_8)).contains("\"status\":\"success\"").contains("dsbulk_records_total").contains("\"application_name\":\"DataStax Bulk Loader LOAD1\"").contains("\"application_version\":\"" + WorkflowUtils.getBulkLoaderVersion() + "\"").contains("\"client_id\":\"fc93e4ac-7fa5-394f-814e-21b735d04c10\"").contains("\"driver_version\":\"" + Session.OSS_DRIVER_COORDINATES.getVersion() + "\"").contains("\"instance\":\"host.testcontainers.internal:8080\"").contains("\"job\":\"dsbulk\"").contains("\"exported_job\":\"job1\"").contains("\"foo\":\"bar\"").contains("\"operation_id\":\"LOAD1\"");
URL prometheusDriverPullQuery = new URL("http", "localhost", prometheus.getMappedPort(9090), "/api/v1/query?query=dsbulk_driver_nodes_pool_open_connections" + "{instance=\"host.testcontainers.internal:8080\"}[5m]");
assertThat(Resources.toString(prometheusDriverPullQuery, StandardCharsets.UTF_8)).contains("\"status\":\"success\"").contains("dsbulk_driver_nodes_pool_open_connections").contains("\"node\":\"" + hostname + ":" + port + "\"").contains("\"application_name\":\"DataStax Bulk Loader LOAD1\"").contains("\"application_version\":\"" + WorkflowUtils.getBulkLoaderVersion() + "\"").contains("\"client_id\":\"fc93e4ac-7fa5-394f-814e-21b735d04c10\"").contains("\"driver_version\":\"" + Session.OSS_DRIVER_COORDINATES.getVersion() + "\"").contains("\"instance\":\"host.testcontainers.internal:8080\"").contains("\"job\":\"dsbulk\"").contains("\"exported_job\":\"job1\"").contains("\"foo\":\"bar\"").contains("\"operation_id\":\"LOAD1\"");
// assert that metrics were pushed to the gateway
URL gatewayQuery = new URL("http", "localhost", gateway.getMappedPort(9091), "/metrics");
String labelsAndValue = "\\{" + "application_name=\"DataStax Bulk Loader LOAD1\"," + "application_version=\"" + WorkflowUtils.getBulkLoaderVersion() + "\"," + "client_id=\"fc93e4ac-7fa5-394f-814e-21b735d04c10\"," + "driver_version=\"" + Session.OSS_DRIVER_COORDINATES.getVersion() + "\"," + "foo=\"bar\"," + "instance=\".+\"," + "job=\"job1\"," + "operation_id=\"LOAD1\"} .+";
assertThat(Resources.readLines(gatewayQuery, StandardCharsets.UTF_8)).anySatisfy(line -> assertThat(line).matches("dsbulk_success" + labelsAndValue)).anySatisfy(line -> assertThat(line).matches("dsbulk_last_success" + labelsAndValue)).anySatisfy(line -> assertThat(line).matches("dsbulk_records_total" + labelsAndValue)).allSatisfy(line -> assertThat(line).doesNotContain("dsbulk_driver_nodes_pool_open_connections"));
// assert that Prometheus scraped DSBulk metrics from the gateway
URL prometheusPullGatewayQuery = new URL("http", "localhost", prometheus.getMappedPort(9090), "/api/v1/query?query=dsbulk_records_total{instance=\"gateway:9091\"}[5m]");
assertThat(Resources.toString(prometheusPullGatewayQuery, StandardCharsets.UTF_8)).contains("\"status\":\"success\"").contains("dsbulk_records_total").contains("\"application_name\":\"DataStax Bulk Loader LOAD1\"").contains("\"application_version\":\"" + WorkflowUtils.getBulkLoaderVersion() + "\"").contains("\"client_id\":\"fc93e4ac-7fa5-394f-814e-21b735d04c10\"").contains("\"driver_version\":\"" + Session.OSS_DRIVER_COORDINATES.getVersion() + "\"").contains("\"instance\":\"gateway:9091\"").contains("\"job\":\"gateway\"").contains("\"exported_job\":\"job1\"").contains("\"foo\":\"bar\"").contains("\"operation_id\":\"LOAD1\"");
// assert that driver metrics did not arrive to Prometheus through the gateway
URL prometheusDriverPullGatewayQuery = new URL("http", "localhost", prometheus.getMappedPort(9090), "/api/v1/query?query=dsbulk_driver_nodes_pool_open_connections{instance=\"gateway:9091\"}[5m]");
assertThat(Resources.toString(prometheusDriverPullGatewayQuery, StandardCharsets.UTF_8)).isEqualTo("{\"status\":\"success\",\"data\":{\"resultType\":\"matrix\",\"result\":[]}}");
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method temporal_roundtrip_numeric.
/**
* Test for DAT-364 (numeric timestamps) and DAT-428 (numeric dates and times).
*/
@Test
void temporal_roundtrip_numeric() throws Exception {
assumeTrue(ccm.getCassandraVersion().compareTo(V3) >= 0, "CQL type date is not compatible with C* < 3.0");
session.execute("DROP TABLE IF EXISTS temporals");
session.execute("CREATE TABLE IF NOT EXISTS temporals (key int PRIMARY KEY, vdate date, vtime time, vtimestamp timestamp)");
List<String> args = new ArrayList<>();
args.add("load");
args.add("--connector.csv.ignoreLeadingWhitespaces");
args.add("true");
args.add("--connector.csv.ignoreTrailingWhitespaces");
args.add("true");
args.add("--connector.csv.url");
args.add(ClassLoader.getSystemResource("temporal-numeric.csv").toExternalForm());
args.add("--connector.csv.header");
args.add("true");
args.add("--codec.timeZone");
args.add("Europe/Paris");
args.add("--codec.date");
args.add("UNITS_SINCE_EPOCH");
args.add("--codec.time");
args.add("UNITS_SINCE_EPOCH");
args.add("--codec.timestamp");
args.add("UNITS_SINCE_EPOCH");
args.add("--codec.unit");
args.add("MINUTES");
args.add("--codec.epoch");
args.add("2000-01-01T00:00:00+01:00[Europe/Paris]");
args.add("--schema.keyspace");
args.add(session.getKeyspace().get().asInternal());
args.add("--schema.table");
args.add("temporals");
args.add("--schema.mapping");
args.add("*=*");
ExitStatus loadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(loadStatus, STATUS_OK);
checkNumericTemporalsWritten(session);
FileUtils.deleteDirectory(logDir);
args = new ArrayList<>();
args.add("unload");
args.add("--connector.csv.url");
args.add(quoteJson(unloadDir));
args.add("--connector.csv.header");
args.add("false");
args.add("--connector.csv.delimiter");
args.add(";");
args.add("--codec.timeZone");
args.add("Europe/Paris");
args.add("--codec.date");
args.add("UNITS_SINCE_EPOCH");
args.add("--codec.time");
args.add("UNITS_SINCE_EPOCH");
args.add("--codec.timestamp");
args.add("UNITS_SINCE_EPOCH");
args.add("--codec.unit");
args.add("MINUTES");
args.add("--codec.epoch");
args.add("2000-01-01T00:00:00+01:00[Europe/Paris]");
args.add("--connector.csv.maxConcurrentFiles");
args.add("1");
args.add("--schema.keyspace");
args.add(session.getKeyspace().get().asInternal());
args.add("--schema.query");
args.add("SELECT key, vdate, vtime, vtimestamp FROM temporals");
ExitStatus unloadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(unloadStatus, STATUS_OK);
checkNumericTemporalsRead(unloadDir);
FileUtils.deleteDirectory(logDir);
// check we can load from the unloaded dataset
args = new ArrayList<>();
args.add("load");
args.add("--connector.csv.url");
args.add(quoteJson(unloadDir));
args.add("--connector.csv.header");
args.add("false");
args.add("--connector.csv.delimiter");
args.add(";");
args.add("--codec.timeZone");
args.add("Europe/Paris");
args.add("--codec.date");
args.add("UNITS_SINCE_EPOCH");
args.add("--codec.time");
args.add("UNITS_SINCE_EPOCH");
args.add("--codec.timestamp");
args.add("UNITS_SINCE_EPOCH");
args.add("--codec.unit");
args.add("MINUTES");
args.add("--codec.epoch");
args.add("2000-01-01T00:00:00+01:00[Europe/Paris]");
args.add("--schema.keyspace");
args.add(session.getKeyspace().get().asInternal());
args.add("--schema.table");
args.add("temporals");
args.add("--schema.mapping");
args.add("key, vdate, vtime, vtimestamp");
loadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(loadStatus, STATUS_OK);
checkNumericTemporalsWritten(session);
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method delete_column_with_mapping.
@Test
void delete_column_with_mapping() {
session.execute("DROP TABLE IF EXISTS test_delete");
session.execute("CREATE TABLE IF NOT EXISTS test_delete (\"PK\" int, cc int, value int, PRIMARY KEY (\"PK\", cc))");
session.execute("INSERT INTO test_delete (\"PK\", cc, value) VALUES (1,1,1)");
session.execute("INSERT INTO test_delete (\"PK\", cc, value) VALUES (1,2,2)");
MockConnector.mockReads(RecordUtils.indexedCSV("1", "1", ""));
List<String> args = new ArrayList<>();
args.add("load");
args.add("--connector.name");
args.add("mock");
args.add("--schema.keyspace");
args.add(session.getKeyspace().get().asInternal());
args.add("--schema.table");
args.add("test_delete");
args.add("--schema.mapping");
args.add("0=PK,1=cc,2=value");
args.add("--schema.nullToUnset");
args.add("false");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
ResultSet rs1 = session.execute("SELECT value FROM test_delete WHERE \"PK\" = 1 AND cc = 1");
Row row1 = rs1.one();
assertThat(row1).isNotNull();
assertThat(row1.isNull(0)).isTrue();
ResultSet rs2 = session.execute("SELECT value FROM test_delete WHERE \"PK\" = 1 AND cc = 2");
Row row2 = rs2.one();
assertThat(row2).isNotNull();
assertThat(row2.isNull(0)).isFalse();
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method unload_load_preserving_ttl_and_timestamp_custom_mapping_constant_expressions.
@Test
void unload_load_preserving_ttl_and_timestamp_custom_mapping_constant_expressions() throws IOException {
assumeTrue(ccm.getCassandraVersion().compareTo(V3_11_5) >= 0, "Literal selectors are supported starting with C* 3.11.5, see CASSANDRA-9243");
session.execute("DROP TABLE IF EXISTS preserve_ttl_timestamp");
session.execute("CREATE TABLE preserve_ttl_timestamp (pk1 int, pk2 int, cc1 int, cc2 int, v1 int, v2 int, PRIMARY KEY ((pk1, pk2), cc1, cc2))");
session.execute("BEGIN BATCH " + "INSERT INTO preserve_ttl_timestamp (pk1, pk2, cc1, cc2, v1) " + "VALUES (1, 2, 3, 4, 100) " + "USING TIMESTAMP 1111 AND TTL 111111; " + "INSERT INTO preserve_ttl_timestamp (pk1, pk2, cc1, cc2, v2) " + "VALUES (1, 2, 3, 4, 200) " + // no TTL
"USING TIMESTAMP 2222; " + "APPLY BATCH");
List<String> args = Lists.newArrayList("unload", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--codec.nullStrings", "NULL", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "preserve_ttl_timestamp", "-timestamp", "true", "-ttl", "true");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
Stream<String> line = FileUtils.readAllLinesInDirectoryAsStreamExcludingHeaders(unloadDir);
assertThat(line).singleElement(InstanceOfAssertFactories.STRING).contains("1,2,3,4,").contains("100").contains("200").containsPattern("1970-01-01T00:00:00\\.001111Z,111\\d\\d\\d").containsPattern(// no TTL
"1970-01-01T00:00:00\\.002222Z,NULL");
assertThat(logs).doesNotHaveMessageContaining("Skipping timestamp preservation").doesNotHaveMessageContaining("Skipping TTL preservation");
FileUtils.deleteDirectory(logDir);
logs.clear();
session.execute("TRUNCATE preserve_ttl_timestamp");
args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "preserve_ttl_timestamp", "--schema.mapping", "*=*, (int)222222 = ttl(v2)", "-timestamp", "true", "-ttl", "true");
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
ResultSet rs = session.execute("SELECT pk1, pk2, cc1, cc2, " + "v1, writetime(v1) AS v1w, ttl(v1) as v1t, " + "v2, writetime(v2) AS v2w, ttl(v2) as v2t " + "FROM preserve_ttl_timestamp " + "WHERE pk1 = 1 AND pk2 = 2 AND cc1 = 3 AND cc2 = 4");
Row row = rs.one();
assertThat(row).isNotNull();
assertThat(row.getInt("pk1")).isEqualTo(1);
assertThat(row.getInt("pk2")).isEqualTo(2);
assertThat(row.getInt("cc1")).isEqualTo(3);
assertThat(row.getInt("cc2")).isEqualTo(4);
assertThat(row.getObject("v1")).isEqualTo(100);
assertThat(row.getObject("v2")).isEqualTo(200);
assertThat(row.getLong("v1w")).isEqualTo(1111L);
assertThat(row.getLong("v2w")).isEqualTo(2222L);
assertThat(row.getInt("v1t")).isLessThanOrEqualTo(111111).isGreaterThan(111000);
assertThat(row.getInt("v2t")).isLessThanOrEqualTo(222222).isGreaterThan(222000);
assertThat(logs).doesNotHaveMessageContaining("Skipping timestamp preservation").doesNotHaveMessageContaining("Skipping TTL preservation");
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method load_user_defined_functions_custom_query.
@Test
void load_user_defined_functions_custom_query() {
assumeTrue(ccm.getCassandraVersion().compareTo(V2_2) >= 0, "User-defined functions are not compatible with C* < 2.2");
session.execute("DROP TABLE IF EXISTS udf_table");
session.execute("CREATE TABLE udf_table (pk int PRIMARY KEY, \"Value 1\" int, \"Value 2\" int, \"SUM\" int)");
session.execute("DROP FUNCTION IF EXISTS plus");
session.execute("CREATE FUNCTION plus(s int, v int) RETURNS NULL ON NULL INPUT RETURNS int LANGUAGE java AS 'return s+v;';");
MockConnector.mockReads(RecordUtils.mappedCSV("pk", "0", "Value 1", "1", "Value 2", "2"));
List<String> args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.name", "mock", "--connector.csv.maxConcurrentFiles", "1", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.query", quoteJson("INSERT INTO udf_table " + "(pk, \"Value 1\", \"Value 2\", \"SUM\") " + "VALUES " + "(:pk, :\"Value 1\", :\"Value 2\", plus(:\"Value 1\", :\"Value 2\"))"));
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
Row row = session.execute("SELECT * FROM udf_table").one();
assertThat(row.getInt("pk")).isEqualTo(0);
assertThat(row.getInt("\"Value 1\"")).isEqualTo(1);
assertThat(row.getInt("\"Value 2\"")).isEqualTo(2);
assertThat(row.getInt("\"SUM\"")).isEqualTo(3);
}
Aggregations