use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method unload_and_load_timestamp_ttl_case_sensitive_custom_query_aliased.
@Test
void unload_and_load_timestamp_ttl_case_sensitive_custom_query_aliased() throws IOException {
session.execute("DROP TABLE IF EXISTS \"UNLOAD_AND_LOAD_TIMESTAMP_TTL\"");
session.execute("CREATE TABLE \"UNLOAD_AND_LOAD_TIMESTAMP_TTL\" (key int PRIMARY KEY, \"My Value\" text)");
session.execute("INSERT INTO \"UNLOAD_AND_LOAD_TIMESTAMP_TTL\" (key, \"My Value\") VALUES (1, 'foo') " + "USING TIMESTAMP 123456789 AND TTL 123456789");
List<String> args = Lists.newArrayList("unload", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.query", quoteJson("SELECT key, \"My Value\", " + "writetime(\"My Value\") AS \"MyWritetime\", " + "ttl(\"My Value\") AS \"MyTtl\" " + "FROM \"UNLOAD_AND_LOAD_TIMESTAMP_TTL\""));
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
Stream<String> line = FileUtils.readAllLinesInDirectoryAsStreamExcludingHeaders(unloadDir);
assertThat(line).singleElement(InstanceOfAssertFactories.STRING).contains("1,foo,").contains(CodecUtils.numberToInstant(123456789, MICROSECONDS, EPOCH).toString()).containsPattern(",\\d+");
FileUtils.deleteDirectory(logDir);
session.execute("TRUNCATE \"UNLOAD_AND_LOAD_TIMESTAMP_TTL\"");
args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.query", quoteJson("INSERT INTO \"UNLOAD_AND_LOAD_TIMESTAMP_TTL\" (key, \"My Value\") " + "VALUES (:key, :\"My Value\") " + "USING TIMESTAMP :\"MyWritetime\" " + "AND TTL :\"MyTtl\""));
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
ResultSet rs = session.execute("SELECT key, \"My Value\", " + "writetime(\"My Value\") AS timestamp, " + "ttl(\"My Value\") AS ttl " + "FROM \"UNLOAD_AND_LOAD_TIMESTAMP_TTL\" WHERE key = 1");
Row row = rs.one();
assertThat(row.getLong("timestamp")).isEqualTo(123456789L);
assertThat(row.getInt("ttl")).isLessThanOrEqualTo(123456789);
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method unload_load_preserving_ttl_and_timestamp.
@ParameterizedTest
@MethodSource
void unload_load_preserving_ttl_and_timestamp(DataType cqlType, Object value1, Object value2, String csv1, String csv2) throws IOException {
checkCqlTypeSupported(cqlType);
session.execute("DROP TABLE IF EXISTS preserve_ttl_timestamp");
session.execute(String.format("CREATE TABLE preserve_ttl_timestamp (pk1 int, pk2 int, cc1 int, cc2 int, v1 %s, v2 %s, PRIMARY KEY ((pk1, pk2), cc1, cc2))", cqlType.asCql(true, true), cqlType.asCql(true, true)));
TypeCodec<Object> codec = CodecRegistry.DEFAULT.codecFor(cqlType);
session.execute(String.format("BEGIN BATCH " + "INSERT INTO preserve_ttl_timestamp (pk1, pk2, cc1, cc2, v1) " + "VALUES (1, 2, 3, 4, %s) " + "USING TIMESTAMP 1111 AND TTL 111111; " + "INSERT INTO preserve_ttl_timestamp (pk1, pk2, cc1, cc2, v2) " + "VALUES (1, 2, 3, 4, %s) " + "USING TIMESTAMP 2222 AND TTL 222222; " + "APPLY BATCH", codec.format(value1), codec.format(value2)));
List<String> args = Lists.newArrayList("unload", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "preserve_ttl_timestamp", "-timestamp", "true", "-ttl", "true");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
Stream<String> line = FileUtils.readAllLinesInDirectoryAsStreamExcludingHeaders(unloadDir);
assertThat(line).singleElement(InstanceOfAssertFactories.STRING).contains("1,2,3,4,", csv1, csv2).containsPattern(",1970-01-01T00:00:00\\.001111Z,111\\d\\d\\d,").containsPattern(",1970-01-01T00:00:00\\.002222Z,222\\d\\d\\d");
assertThat(logs).doesNotHaveMessageContaining("Skipping timestamp preservation").doesNotHaveMessageContaining("Skipping TTL preservation");
FileUtils.deleteDirectory(logDir);
logs.clear();
session.execute("TRUNCATE preserve_ttl_timestamp");
args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "preserve_ttl_timestamp", "-timestamp", "true", "-ttl", "true");
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
ResultSet rs = session.execute("SELECT pk1, pk2, cc1, cc2, " + "v1, writetime(v1) AS v1w, ttl(v1) as v1t, " + "v2, writetime(v2) AS v2w, ttl(v2) as v2t " + "FROM preserve_ttl_timestamp " + "WHERE pk1 = 1 AND pk2 = 2 AND cc1 = 3 AND cc2 = 4");
Row row = rs.one();
assertThat(row).isNotNull();
assertThat(row.getInt("pk1")).isEqualTo(1);
assertThat(row.getInt("pk2")).isEqualTo(2);
assertThat(row.getInt("cc1")).isEqualTo(3);
assertThat(row.getInt("cc2")).isEqualTo(4);
assertThat(row.getObject("v1")).isEqualTo(value1);
assertThat(row.getObject("v2")).isEqualTo(value2);
assertThat(row.getLong("v1w")).isEqualTo(1111L);
assertThat(row.getLong("v2w")).isEqualTo(2222L);
assertThat(row.getInt("v1t")).isLessThanOrEqualTo(111111).isGreaterThan(111000);
assertThat(row.getInt("v2t")).isLessThanOrEqualTo(222222).isGreaterThan(222000);
assertThat(logs).doesNotHaveMessageContaining("Skipping timestamp preservation").doesNotHaveMessageContaining("Skipping TTL preservation");
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class JsonEndToEndSimulacronIT method full_unload_custom_features.
@Test
void full_unload_custom_features() throws Exception {
List<LinkedHashMap<String, Object>> rows = new ArrayList<>();
LinkedHashMap<String, Object> row = new LinkedHashMap<>();
row.put("pk", 1);
row.put("c1", null);
rows.add(row);
SimulacronUtils.primeTables(simulacron, new SimulacronUtils.Keyspace("ks1", new Table("table1", singletonList(new Column("pk", INT)), emptyList(), singletonList(new Column("c1", TEXT)), rows)));
String[] args = { "unload", "-c", "json", "--connector.json.url", StringUtils.quoteJson(unloadDir), "--connector.json.maxConcurrentFiles", "1", "--schema.query", "SELECT pk, c1 FROM ks1.table1", "--connector.json.generatorFeatures", "{QUOTE_FIELD_NAMES = false}", "--connector.json.serializationStrategy", "NON_NULL" };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateQueryCount(simulacron, 1, "SELECT pk, c1 FROM ks1.table1 WHERE token(pk) > :start AND token(pk) <= :end", LOCAL_ONE);
validateOutputFiles(1, unloadDir);
Optional<String> line = FileUtils.readAllLinesInDirectoryAsStream(unloadDir).findFirst();
assertThat(line).isPresent().hasValue("{pk:1}");
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class JsonEndToEndSimulacronIT method load_errors.
@Test
void load_errors() throws Exception {
primeIpByCountryTable(simulacron);
Map<String, Object> params = new HashMap<>();
params.put("country_name", "Sweden");
RequestPrime prime1 = createParameterizedQuery(INSERT_INTO_IP_BY_COUNTRY, params, new SuccessResult(emptyList(), new LinkedHashMap<>()));
simulacron.prime(new Prime(prime1));
// recoverable errors only
params.put("country_name", "France");
prime1 = createParameterizedQuery(INSERT_INTO_IP_BY_COUNTRY, params, new ReadTimeoutResult(LOCAL_ONE, 1, 0, false));
simulacron.prime(new Prime(prime1));
params.put("country_name", "Gregistan");
prime1 = createParameterizedQuery(INSERT_INTO_IP_BY_COUNTRY, params, new WriteTimeoutResult(LOCAL_ONE, 0, 0, WriteType.BATCH_LOG));
simulacron.prime(new Prime(prime1));
params.put("country_name", "Andybaijan");
prime1 = createParameterizedQuery(INSERT_INTO_IP_BY_COUNTRY, params, new WriteFailureResult(ONE, 0, 0, emptyMap(), WriteType.BATCH));
simulacron.prime(new Prime(prime1));
params = new HashMap<>();
params.put("country_name", "United States");
prime1 = createParameterizedQuery(INSERT_INTO_IP_BY_COUNTRY, params, new FunctionFailureResult("keyspace", "function", emptyList(), "bad function call"));
simulacron.prime(new Prime(prime1));
String[] args = { "load", "-c", "json", "--connector.json.url", StringUtils.quoteJson(JsonUtils.JSON_RECORDS_ERROR), "--driver.advanced.retry-policy.max-retries", "1", "--schema.keyspace", "ks1", "--schema.query", INSERT_INTO_IP_BY_COUNTRY, "--schema.mapping", IP_BY_COUNTRY_MAPPING_NAMED };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_COMPLETED_WITH_ERRORS);
// There are 24 rows of data, but two extra queries due to the retry for the write timeout and
// the unavailable.
validateQueryCount(simulacron, 26, "INSERT INTO ip_by_country", LOCAL_ONE);
validateNumberOfBadRecords(4);
validateExceptionsLog(4, "Source:", "load-errors.log");
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class JsonEndToEndSimulacronIT method unload_failure_during_read_multi_thread.
@Test
void unload_failure_during_read_multi_thread() {
primeIpByCountryTable(simulacron);
RequestPrime prime = createQueryWithError(SELECT_FROM_IP_BY_COUNTRY, new SyntaxErrorResult("Invalid table", 0L, false));
simulacron.prime(new Prime(prime));
String[] args = { "unload", "-c", "json", "--connector.json.url", StringUtils.quoteJson(unloadDir), "--connector.json.maxConcurrentFiles", "4", "--schema.keyspace", "ks1", "--schema.query", SELECT_FROM_IP_BY_COUNTRY, "--schema.mapping", IP_BY_COUNTRY_MAPPING_NAMED };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_ABORTED_FATAL_ERROR);
validateQueryCount(simulacron, 0, SELECT_FROM_IP_BY_COUNTRY, LOCAL_ONE);
validatePrepare(simulacron, SELECT_FROM_IP_BY_COUNTRY);
}
Aggregations