use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method unload_and_load_timestamp_ttl.
@Test
void unload_and_load_timestamp_ttl() throws IOException {
session.execute("DROP TABLE IF EXISTS unload_and_load_timestamp_ttl");
session.execute("CREATE TABLE unload_and_load_timestamp_ttl (key int PRIMARY KEY, value text)");
session.execute("INSERT INTO unload_and_load_timestamp_ttl (key, value) VALUES (1, 'foo') " + "USING TIMESTAMP 123456789 AND TTL 123456789");
List<String> args = Lists.newArrayList("unload", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.query", "SELECT key, value, writetime(value) AS timestamp, ttl(value) AS ttl " + "FROM unload_and_load_timestamp_ttl");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
Stream<String> line = FileUtils.readAllLinesInDirectoryAsStreamExcludingHeaders(unloadDir);
assertThat(line).singleElement(InstanceOfAssertFactories.STRING).contains("1,foo,").contains(CodecUtils.numberToInstant(123456789, MICROSECONDS, EPOCH).toString()).containsPattern(",\\d+");
FileUtils.deleteDirectory(logDir);
session.execute("TRUNCATE unload_and_load_timestamp_ttl");
args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "unload_and_load_timestamp_ttl", "--schema.mapping", "* = * , timestamp = writetime(*), ttl = ttl(*)");
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
ResultSet rs = session.execute("SELECT key, value, writetime(value) AS timestamp, ttl(value) AS ttl " + "FROM unload_and_load_timestamp_ttl WHERE key = 1");
Row row = rs.one();
assertThat(row.getLong("timestamp")).isEqualTo(123456789L);
assertThat(row.getInt("ttl")).isLessThanOrEqualTo(123456789);
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method function_mapped_to_primary_key.
/**
* Test for DAT-326.
*/
@Test
void function_mapped_to_primary_key() {
session.execute("DROP TABLE IF EXISTS dat326a");
session.execute("CREATE TABLE IF NOT EXISTS dat326a (pk int, cc timeuuid, v int, PRIMARY KEY (pk, cc))");
List<String> args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "-header", "true", "--connector.csv.url", quoteJson(getClass().getResource("/function-pk.csv")), "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "dat326a", "--schema.mapping", "now()=cc,*=*");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method load_rebatch.
@Test
void load_rebatch() {
session.execute("DROP TABLE IF EXISTS rebatch");
session.execute("CREATE TABLE rebatch (pk int, cc int, v1 text, v2 text, PRIMARY KEY (pk, cc))");
List<String> args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(ClassLoader.getSystemResource("rebatch.csv").toExternalForm()), "--connector.csv.header", "true", "--connector.csv.ignoreLeadingWhitespaces", "true", "--connector.csv.ignoreTrailingWhitespaces", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "rebatch", "--schema.mapping", "fk = pk, fc = cc, f1 = v1, f2 = v2, " + "timestamp1 = writetime(v1), timestamp2 = writetime(v2)," + "ttl1 = ttl(v1) , ttl2 = ttl(v2)");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
ResultSet rs = session.execute("SELECT pk, cc, " + "v1, writetime(v1) AS v1w, ttl(v1) as v1t, " + "v2, writetime(v2) AS v2w, ttl(v2) as v2t " + "FROM rebatch " + "WHERE pk = 1");
List<Row> rows = rs.all();
assertThat(rows).hasSize(20);
long v1w = ZonedDateTime.parse("2022-02-10T00:00:01-03:00").toInstant().toEpochMilli() * 1000;
long v2w = ZonedDateTime.parse("2022-03-10T00:00:01-03:00").toInstant().toEpochMilli() * 1000;
for (int i = 0; i < rows.size(); i++) {
Row row = rows.get(i);
String v1 = String.valueOf((char) ('a' + i));
String v2 = Strings.repeat(v1, 2);
assertThat(row).isNotNull();
assertThat(row.getInt("pk")).isEqualTo(1);
assertThat(row.getInt("cc")).isEqualTo(1 + i);
assertThat(row.getString("v1")).isEqualTo(v1);
assertThat(row.getString("v2")).isEqualTo(v2);
assertThat(row.getLong("v1w")).isEqualTo(v1w);
assertThat(row.getLong("v2w")).isEqualTo(v2w);
assertThat(row.getInt("v1t")).isLessThanOrEqualTo(1000);
assertThat(row.getInt("v2t")).isLessThanOrEqualTo(2000);
v1w += 1_000_000;
v2w += 1_000_000;
}
// We have 20 records and an original generated BATCH query containing 2 child statements.
// Since batches were unwrapped, we expect 40 INSERT statements total being executed.
// Since all records have the same partition key, we expect these to be all rebatched together.
// The default max batch size being 32, we expect one first batch with 32 statements,
// and a second one with the remaining 8.
assertThat(logs.getAllMessagesAsString()).contains("Batches: total: 2, size: 20.00 mean, 8 min, 32 max");
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method load_ttl_timestamp_now_in_query_and_mapping_positional_external_names.
@Test
void load_ttl_timestamp_now_in_query_and_mapping_positional_external_names() {
session.execute("DROP TABLE IF EXISTS table_ttl_timestamp");
session.execute("CREATE TABLE table_ttl_timestamp (key int PRIMARY KEY, value text, loaded_at timeuuid)");
List<String> args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.ignoreLeadingWhitespaces", "true", "--connector.csv.ignoreTrailingWhitespaces", "true", "--connector.csv.url", ClassLoader.getSystemResource("ttl-timestamp.csv").toExternalForm(), "--driver.pooling.local.connections", "1", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.query", "insert into table_ttl_timestamp (key, value, loaded_at) values (?, ?, now()) using ttl ? and timestamp ?", "--schema.mapping", "*=*, created_at = __timestamp, time_to_live = __ttl");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
assertTTLAndTimestamp();
assertThat(logs).hasMessageContaining("The special __ttl mapping token has been deprecated").hasMessageContaining("The special __timestamp mapping token has been deprecated");
logs.clear();
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method unload_load_preserving_ttl_and_timestamp_custom_mapping.
@ParameterizedTest
@MethodSource("unload_load_preserving_ttl_and_timestamp")
void unload_load_preserving_ttl_and_timestamp_custom_mapping(DataType cqlType, Object value1, Object value2, String csv1, String csv2) throws IOException {
checkCqlTypeSupported(cqlType);
session.execute("DROP TABLE IF EXISTS preserve_ttl_timestamp");
session.execute(String.format("CREATE TABLE preserve_ttl_timestamp (pk1 int, pk2 int, cc1 int, cc2 int, v1 %s, v2 %s, PRIMARY KEY ((pk1, pk2), cc1, cc2))", cqlType.asCql(true, true), cqlType.asCql(true, true)));
TypeCodec<Object> codec = CodecRegistry.DEFAULT.codecFor(cqlType);
session.execute(String.format("BEGIN BATCH " + "INSERT INTO preserve_ttl_timestamp (pk1, pk2, cc1, cc2, v1) " + "VALUES (1, 2, 3, 4, %s) " + "USING TIMESTAMP 1111 AND TTL 111111; " + "INSERT INTO preserve_ttl_timestamp (pk1, pk2, cc1, cc2, v2) " + "VALUES (1, 2, 3, 4, %s) " + "USING TIMESTAMP 2222 AND TTL 222222; " + "APPLY BATCH", codec.format(value1), codec.format(value2)));
List<String> args = Lists.newArrayList("unload", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "preserve_ttl_timestamp", "--schema.mapping", "*=*, v1w=writetime(v1), v1t=ttl(v1), v2w=writetime(v2), v2t=ttl(v2)", "-timestamp", "true", "-ttl", "true");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
Stream<String> line = FileUtils.readAllLinesInDirectoryAsStreamExcludingHeaders(unloadDir);
assertThat(line).singleElement(InstanceOfAssertFactories.STRING).contains("1,2,3,4,").contains(csv1, csv2).containsPattern("1970-01-01T00:00:00\\.001111Z").containsPattern("111\\d\\d\\d,").containsPattern("1970-01-01T00:00:00\\.002222Z").containsPattern("222\\d\\d\\d");
assertThat(logs).doesNotHaveMessageContaining("Skipping timestamp preservation").doesNotHaveMessageContaining("Skipping TTL preservation");
FileUtils.deleteDirectory(logDir);
logs.clear();
session.execute("TRUNCATE preserve_ttl_timestamp");
args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "preserve_ttl_timestamp", "--schema.mapping", "*=*, v1w=writetime(v1), v1t=ttl(v1), v2w=writetime(v2), v2t=ttl(v2)", "-timestamp", "true", "-ttl", "true");
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
ResultSet rs = session.execute("SELECT pk1, pk2, cc1, cc2, " + "v1, writetime(v1) AS v1w, ttl(v1) as v1t, " + "v2, writetime(v2) AS v2w, ttl(v2) as v2t " + "FROM preserve_ttl_timestamp " + "WHERE pk1 = 1 AND pk2 = 2 AND cc1 = 3 AND cc2 = 4");
Row row = rs.one();
assertThat(row).isNotNull();
assertThat(row.getInt("pk1")).isEqualTo(1);
assertThat(row.getInt("pk2")).isEqualTo(2);
assertThat(row.getInt("cc1")).isEqualTo(3);
assertThat(row.getInt("cc2")).isEqualTo(4);
assertThat(row.getObject("v1")).isEqualTo(value1);
assertThat(row.getObject("v2")).isEqualTo(value2);
assertThat(row.getLong("v1w")).isEqualTo(1111L);
assertThat(row.getLong("v2w")).isEqualTo(2222L);
assertThat(row.getInt("v1t")).isLessThanOrEqualTo(111111).isGreaterThan(111000);
assertThat(row.getInt("v2t")).isLessThanOrEqualTo(222222).isGreaterThan(222000);
assertThat(logs).doesNotHaveMessageContaining("Skipping timestamp preservation").doesNotHaveMessageContaining("Skipping TTL preservation");
}
Aggregations