use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class JsonEndToEndSimulacronIT method full_load_dry_run.
@Test
void full_load_dry_run() {
primeIpByCountryTable(simulacron);
RequestPrime insert = createSimpleParameterizedQuery(INSERT_INTO_IP_BY_COUNTRY);
simulacron.prime(new Prime(insert));
String[] args = { "load", "-c", "json", "--connector.json.url", StringUtils.quoteJson(JsonUtils.JSON_RECORDS_UNIQUE), "-dryRun", "true", "--schema.keyspace", "ks1", "--schema.query", INSERT_INTO_IP_BY_COUNTRY, "--schema.mapping", IP_BY_COUNTRY_MAPPING_NAMED };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateQueryCount(simulacron, 0, "INSERT INTO ip_by_country", ONE);
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class GraphEndToEndCCMIT method json_full_load_unload_and_load_again_edges.
@Test
void json_full_load_unload_and_load_again_edges() throws Exception {
// Load Customer Order data
List<String> args = com.datastax.oss.driver.shaded.guava.common.collect.Lists.newArrayList("load", "-g", FRAUD_GRAPH, "-e", PLACES_EDGE_LABEL, "-from", CUSTOMER_VERTEX_LABEL, "-to", ORDER_VERTEX_LABEL, "-url", StringUtils.quoteJson(JSON_CUSTOMER_ORDER_RECORDS), "-m", CUSTOMER_ORDER_MAPPINGS, "--connector.name", "json");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateResultSetSize(14, SELECT_ALL_CUSTOMER_ORDERS);
TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
logs.clear();
stderr.clear();
GraphResultSet results = session.execute(ScriptGraphStatement.newInstance("g.E().hasLabel('" + PLACES_EDGE_LABEL + "')"));
assertThat(results).hasSize(14);
FileUtils.deleteDirectory(logDir);
// Unload customer order data
args = com.datastax.oss.driver.shaded.guava.common.collect.Lists.newArrayList("unload", "-g", FRAUD_GRAPH, "-e", PLACES_EDGE_LABEL, "-from", CUSTOMER_VERTEX_LABEL, "-to", ORDER_VERTEX_LABEL, "--connector.name", "json", "-url", quoteJson(unloadDir), "-m", CUSTOMER_ORDER_MAPPINGS, "--connector.json.maxConcurrentFiles", "1");
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateOutputFiles(14, unloadDir);
TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
logs.clear();
stderr.clear();
// Remove data for reload validation
truncateTables();
// Reload Customer Order data
args = com.datastax.oss.driver.shaded.guava.common.collect.Lists.newArrayList("load", "-g", FRAUD_GRAPH, "-e", PLACES_EDGE_LABEL, "-from", CUSTOMER_VERTEX_LABEL, "-to", ORDER_VERTEX_LABEL, "-url", quoteJson(unloadDir), "--connector.name", "json", "-m", CUSTOMER_ORDER_MAPPINGS);
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateResultSetSize(14, SELECT_ALL_CUSTOMER_ORDERS);
TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
results = session.execute(ScriptGraphStatement.newInstance("g.E().hasLabel('" + PLACES_EDGE_LABEL + "')"));
assertThat(results).hasSize(14);
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class GraphEndToEndCCMIT method csv_full_load_unload_and_load_again_vertices.
@Test
void csv_full_load_unload_and_load_again_vertices() throws Exception {
List<String> args = Lists.newArrayList("load", "-g", FRAUD_GRAPH, "-v", CUSTOMER_VERTEX_LABEL, "-url", StringUtils.quoteJson(CSV_CUSTOMER_RECORDS), "--connector.csv.delimiter", "|");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateResultSetSize(34, SELECT_ALL_CUSTOMERS);
GraphResultSet results = session.execute(ScriptGraphStatement.newInstance("g.V().hasLabel('" + CUSTOMER_VERTEX_LABEL + "')"));
assertThat(results).hasSize(34);
TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
logs.clear();
stderr.clear();
FileUtils.deleteDirectory(logDir);
args = Lists.newArrayList("unload", "-g", FRAUD_GRAPH, "-v", CUSTOMER_VERTEX_LABEL, "-url", quoteJson(unloadDir), "--connector.csv.delimiter", "|", "--connector.csv.maxConcurrentFiles", "1");
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateOutputFiles(35, unloadDir);
TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
logs.clear();
stderr.clear();
// Remove data for reload validation
truncateTables();
args = Lists.newArrayList("load", "-g", FRAUD_GRAPH, "-v", CUSTOMER_VERTEX_LABEL, "-url", quoteJson(unloadDir), "--connector.csv.delimiter", "|");
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateResultSetSize(34, SELECT_ALL_CUSTOMERS);
TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
results = session.execute(ScriptGraphStatement.newInstance("g.V().hasLabel('" + CUSTOMER_VERTEX_LABEL + "')"));
assertThat(results).hasSize(34);
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class GraphEndToEndCCMIT method json_full_load_unload_and_load_again_vertices.
@Test
void json_full_load_unload_and_load_again_vertices() throws Exception {
// Load customer JSON file.
List<String> args = com.datastax.oss.driver.shaded.guava.common.collect.Lists.newArrayList("load", "-g", FRAUD_GRAPH, "-v", CUSTOMER_VERTEX_LABEL, "-url", StringUtils.quoteJson(JSON_CUSTOMER_RECORDS), "--connector.name", "json");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateResultSetSize(34, SELECT_ALL_CUSTOMERS);
TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
logs.clear();
stderr.clear();
GraphResultSet results = session.execute(ScriptGraphStatement.newInstance("g.V().hasLabel('" + CUSTOMER_VERTEX_LABEL + "')"));
assertThat(results).hasSize(34);
FileUtils.deleteDirectory(logDir);
// Unload customer JSON file
args = com.datastax.oss.driver.shaded.guava.common.collect.Lists.newArrayList("unload", "-g", FRAUD_GRAPH, "-v", CUSTOMER_VERTEX_LABEL, "-url", quoteJson(unloadDir), "--connector.name", "json", "--connector.json.maxConcurrentFiles", "1");
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateOutputFiles(34, unloadDir);
TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
logs.clear();
stderr.clear();
// Remove data for reload validation
truncateTables();
// Reload customer data
args = com.datastax.oss.driver.shaded.guava.common.collect.Lists.newArrayList("load", "-g", FRAUD_GRAPH, "-v", CUSTOMER_VERTEX_LABEL, "-url", StringUtils.quoteJson(JSON_CUSTOMER_RECORDS), "--connector.name", "json");
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateResultSetSize(34, SELECT_ALL_CUSTOMERS);
TestAssertions.assertThat(logs).hasMessageContaining("completed successfully");
assertThat(stderr.getStreamAsStringPlain()).contains("completed successfully");
results = session.execute(ScriptGraphStatement.newInstance("g.V().hasLabel('" + CUSTOMER_VERTEX_LABEL + "')"));
assertThat(results).hasSize(34);
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class JsonConnectorEndToEndCCMIT method should_not_truncate_nor_round.
/**
* Test for DAT-224.
*/
@Test
void should_not_truncate_nor_round() throws Exception {
session.execute("DROP TABLE IF EXISTS numbers");
session.execute("CREATE TABLE IF NOT EXISTS numbers (key varchar PRIMARY KEY, vdouble double, vdecimal decimal)");
List<String> args = new ArrayList<>();
args.add("load");
args.add("--connector.name");
args.add("json");
args.add("--connector.json.url");
args.add(StringUtils.quoteJson(ClassLoader.getSystemResource("number.json").toExternalForm()));
args.add("--connector.json.mode");
args.add("SINGLE_DOCUMENT");
args.add("--codec.overflowStrategy");
args.add("REJECT");
args.add("--schema.keyspace");
args.add(session.getKeyspace().map(CqlIdentifier::asInternal).orElseThrow(IllegalStateException::new));
args.add("--schema.table");
args.add("numbers");
args.add("--schema.mapping");
args.add("*=*");
ExitStatus loadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(loadStatus, STATUS_COMPLETED_WITH_ERRORS);
validateExceptionsLog(1, "ArithmeticException: Cannot convert 0.12345678901234567890123456789 from BigDecimal to Double", "mapping-errors.log");
checkNumbersWritten(OverflowStrategy.REJECT, UNNECESSARY, session);
FileUtils.deleteDirectory(logDir);
args = new ArrayList<>();
args.add("unload");
args.add("--connector.name");
args.add("json");
args.add("--connector.json.url");
args.add(quoteJson(unloadDir));
args.add("--connector.json.mode");
args.add("MULTI_DOCUMENT");
args.add("--connector.json.maxConcurrentFiles");
args.add("1");
args.add("--codec.roundingStrategy");
args.add("UNNECESSARY");
args.add("--schema.keyspace");
args.add(session.getKeyspace().map(CqlIdentifier::asInternal).orElseThrow(IllegalStateException::new));
args.add("--schema.query");
args.add("SELECT key, vdouble, vdecimal FROM numbers");
ExitStatus unloadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(unloadStatus, STATUS_OK);
checkNumbersRead(OverflowStrategy.REJECT, unloadDir);
FileUtils.deleteDirectory(logDir);
// check we can load from the unloaded dataset
args = new ArrayList<>();
args.add("load");
args.add("--connector.name");
args.add("json");
args.add("--connector.json.url");
args.add(quoteJson(unloadDir));
args.add("--codec.overflowStrategy");
args.add("REJECT");
args.add("--schema.keyspace");
args.add(session.getKeyspace().map(CqlIdentifier::asInternal).orElseThrow(IllegalStateException::new));
args.add("--schema.table");
args.add("numbers");
args.add("--schema.mapping");
args.add("*=*");
loadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(loadStatus, STATUS_OK);
checkNumbersWritten(OverflowStrategy.REJECT, UNNECESSARY, session);
}
Aggregations