use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class JsonConnectorEndToEndCCMIT method full_load_unload_large_batches.
/**
* Attempts to load and unload a larger dataset which can be batched.
*/
@Test
void full_load_unload_large_batches() throws Exception {
List<String> args = new ArrayList<>();
args.add("load");
args.add("--connector.name");
args.add("json");
args.add("--connector.json.url");
args.add(StringUtils.quoteJson(JsonUtils.JSON_RECORDS));
args.add("--schema.keyspace");
args.add(session.getKeyspace().map(CqlIdentifier::asInternal).orElseThrow(IllegalStateException::new));
args.add("--schema.table");
args.add("ip_by_country");
args.add("--schema.mapping");
args.add(IP_BY_COUNTRY_MAPPING_NAMED);
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateResultSetSize(500, "SELECT * FROM ip_by_country");
FileUtils.deleteDirectory(logDir);
args = new ArrayList<>();
args.add("unload");
args.add("--connector.name");
args.add("json");
args.add("--connector.json.url");
args.add(quoteJson(unloadDir));
args.add("--connector.json.maxConcurrentFiles");
args.add("1");
args.add("--schema.keyspace");
args.add(session.getKeyspace().map(CqlIdentifier::asInternal).orElseThrow(IllegalStateException::new));
args.add("--schema.table");
args.add("ip_by_country");
args.add("--schema.mapping");
args.add(IP_BY_COUNTRY_MAPPING_NAMED);
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateOutputFiles(500, unloadDir);
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class JsonConnectorEndToEndCCMIT method error_load_missing_primary_keys_case_sensitive.
/**
* DAT-307: Test to validate that missing primary keys will fail to load with case-sensitive
* identifiers.
*/
@Test
void error_load_missing_primary_keys_case_sensitive() throws Exception {
session.execute("DROP TABLE IF EXISTS missing");
session.execute("CREATE TABLE missing (\"PK\" varchar, \"CC\" varchar, \"V\" varchar, " + "PRIMARY KEY(\"PK\", \"CC\"))");
List<String> args = new ArrayList<>();
args.add("load");
args.add("--connector.name");
args.add("json");
args.add("--connector.json.url");
args.add(StringUtils.quoteJson(ClassLoader.getSystemResource("missing-case.json")));
args.add("--schema.keyspace");
args.add(session.getKeyspace().map(CqlIdentifier::asInternal).orElseThrow(IllegalStateException::new));
args.add("--schema.table");
args.add("missing");
args.add("--connector.json.mode");
args.add("SINGLE_DOCUMENT");
args.add("--schema.allowMissingFields");
args.add("true");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertThat(status).isEqualTo(STATUS_COMPLETED_WITH_ERRORS);
validateNumberOfBadRecords(4);
validateExceptionsLog(1, "Primary key column \"PK\" cannot be set to null", "mapping-errors.log");
validateExceptionsLog(1, "Primary key column \"CC\" cannot be set to null", "mapping-errors.log");
validateExceptionsLog(1, "Primary key column \"PK\" cannot be left unset", "mapping-errors.log");
validateExceptionsLog(1, "Primary key column \"CC\" cannot be left unset", "mapping-errors.log");
validateResultSetSize(0, "SELECT * FROM missing");
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method unload_token_range_restriction_positional.
@Test
void unload_token_range_restriction_positional() throws IOException {
session.execute("DROP TABLE IF EXISTS unload_token_range");
session.execute("CREATE TABLE unload_token_range (pk int, cc int, v int, PRIMARY KEY (pk, cc))");
session.execute("INSERT INTO unload_token_range (pk, cc, v) values (0, 1, 2)");
List<String> args = Lists.newArrayList("unload", "--log.directory", quoteJson(logDir), "-header", "false", "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.maxConcurrentFiles", "1", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.query", quoteJson("SELECT pk, cc, v FROM unload_token_range " + "WHERE token(pk) > ? AND token(pk) <= ?"));
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
List<String> lines = FileUtils.readAllLinesInDirectoryAsStream(unloadDir).collect(Collectors.toList());
assertThat(lines).hasSize(1).containsExactly("0,1,2");
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method load_unload_nested_function.
@Test
void load_unload_nested_function() throws IOException {
session.execute("DROP TABLE IF EXISTS test_nested_function");
session.execute("CREATE TABLE test_nested_function (pk int PRIMARY KEY, v int, t timestamp)");
MockConnector.mockReads(RecordUtils.mappedCSV("pk", "1", "v", "1"));
List<String> args = new ArrayList<>();
args.add("load");
args.add("--connector.name");
args.add("mock");
args.add("--schema.keyspace");
args.add(session.getKeyspace().get().asInternal());
args.add("--schema.table");
args.add("test_nested_function");
args.add("--schema.mapping");
args.add("*=*,dateof(now())=t");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
List<Row> rows = session.execute("SELECT t FROM test_nested_function WHERE pk = 1").all();
assertThat(rows).hasSize(1);
Instant yesterday = Instant.now().minus(1, ChronoUnit.DAYS);
assertThat(rows.get(0).getInstant(0)).isNotNull().isAfterOrEqualTo(yesterday);
args = new ArrayList<>();
args.add("unload");
args.add("--connector.csv.url");
args.add(quoteJson(unloadDir));
args.add("--schema.keyspace");
args.add(session.getKeyspace().get().asInternal());
args.add("--schema.table");
args.add("test_nested_function");
args.add("--schema.mapping");
args.add("pk,dateof(now())");
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
assertThat(FileUtils.readAllLinesInDirectoryAsStreamExcludingHeaders(unloadDir).collect(Collectors.toList())).singleElement().asString().matches("1,\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d{1,3})?Z");
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method load_ttl_timestamp_now_in_mapping_and_unload_deprecated_mapping_tokens.
@Test
void load_ttl_timestamp_now_in_mapping_and_unload_deprecated_mapping_tokens() throws IOException {
session.execute("DROP TABLE IF EXISTS table_ttl_timestamp");
session.execute("CREATE TABLE table_ttl_timestamp (key int PRIMARY KEY, value text, loaded_at timeuuid)");
List<String> args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.ignoreLeadingWhitespaces", "true", "--connector.csv.ignoreTrailingWhitespaces", "true", "--connector.csv.url", ClassLoader.getSystemResource("ttl-timestamp.csv").toExternalForm(), "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "table_ttl_timestamp", "--schema.mapping", "*:*,now()=loaded_at,created_at=__timestamp,time_to_live=__ttl");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
assertTTLAndTimestamp();
assertThat(logs).hasMessageContaining("The special __ttl mapping token has been deprecated").hasMessageContaining("The special __timestamp mapping token has been deprecated");
logs.clear();
FileUtils.deleteDirectory(logDir);
args = Lists.newArrayList("unload", "--connector.csv.url", quoteJson(unloadDir), "--log.directory", quoteJson(logDir), "--connector.csv.ignoreLeadingWhitespaces", "true", "--connector.csv.ignoreTrailingWhitespaces", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "table_ttl_timestamp", "--schema.mapping", "*:*,created_at=writetime(value),time_to_live=ttl(value)", "--connector.csv.maxConcurrentFiles", "1");
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateOutputFiles(2, unloadDir);
}
Aggregations