Search in sources :

Example 96 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class CSVConnectorEndToEndCCMIT method unload_and_load_timestamp_ttl.

@Test
void unload_and_load_timestamp_ttl() throws IOException {
    session.execute("DROP TABLE IF EXISTS unload_and_load_timestamp_ttl");
    session.execute("CREATE TABLE unload_and_load_timestamp_ttl (key int PRIMARY KEY, value text)");
    session.execute("INSERT INTO unload_and_load_timestamp_ttl (key, value) VALUES (1, 'foo') " + "USING TIMESTAMP 123456789 AND TTL 123456789");
    List<String> args = Lists.newArrayList("unload", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.query", "SELECT key, value, writetime(value) AS timestamp, ttl(value) AS ttl " + "FROM unload_and_load_timestamp_ttl");
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    Stream<String> line = FileUtils.readAllLinesInDirectoryAsStreamExcludingHeaders(unloadDir);
    assertThat(line).singleElement(InstanceOfAssertFactories.STRING).contains("1,foo,").contains(CodecUtils.numberToInstant(123456789, MICROSECONDS, EPOCH).toString()).containsPattern(",\\d+");
    FileUtils.deleteDirectory(logDir);
    session.execute("TRUNCATE unload_and_load_timestamp_ttl");
    args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "unload_and_load_timestamp_ttl", "--schema.mapping", "* = * , timestamp = writetime(*), ttl = ttl(*)");
    status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    ResultSet rs = session.execute("SELECT key, value, writetime(value) AS timestamp, ttl(value) AS ttl " + "FROM unload_and_load_timestamp_ttl WHERE key = 1");
    Row row = rs.one();
    assertThat(row.getLong("timestamp")).isEqualTo(123456789L);
    assertThat(row.getInt("ttl")).isLessThanOrEqualTo(123456789);
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) ResultSet(com.datastax.oss.driver.api.core.cql.ResultSet) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) Row(com.datastax.oss.driver.api.core.cql.Row) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 97 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class CSVConnectorEndToEndCCMIT method function_mapped_to_primary_key.

/**
 * Test for DAT-326.
 */
@Test
void function_mapped_to_primary_key() {
    session.execute("DROP TABLE IF EXISTS dat326a");
    session.execute("CREATE TABLE IF NOT EXISTS dat326a (pk int, cc timeuuid, v int, PRIMARY KEY (pk, cc))");
    List<String> args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "-header", "true", "--connector.csv.url", quoteJson(getClass().getResource("/function-pk.csv")), "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "dat326a", "--schema.mapping", "now()=cc,*=*");
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 98 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class CSVConnectorEndToEndCCMIT method load_rebatch.

@Test
void load_rebatch() {
    session.execute("DROP TABLE IF EXISTS rebatch");
    session.execute("CREATE TABLE rebatch (pk int, cc int, v1 text, v2 text, PRIMARY KEY (pk, cc))");
    List<String> args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(ClassLoader.getSystemResource("rebatch.csv").toExternalForm()), "--connector.csv.header", "true", "--connector.csv.ignoreLeadingWhitespaces", "true", "--connector.csv.ignoreTrailingWhitespaces", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "rebatch", "--schema.mapping", "fk = pk, fc = cc, f1 = v1, f2 = v2, " + "timestamp1 = writetime(v1), timestamp2 = writetime(v2)," + "ttl1       = ttl(v1)      , ttl2       = ttl(v2)");
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    ResultSet rs = session.execute("SELECT pk, cc, " + "v1, writetime(v1) AS v1w, ttl(v1) as v1t, " + "v2, writetime(v2) AS v2w, ttl(v2) as v2t " + "FROM rebatch " + "WHERE pk = 1");
    List<Row> rows = rs.all();
    assertThat(rows).hasSize(20);
    long v1w = ZonedDateTime.parse("2022-02-10T00:00:01-03:00").toInstant().toEpochMilli() * 1000;
    long v2w = ZonedDateTime.parse("2022-03-10T00:00:01-03:00").toInstant().toEpochMilli() * 1000;
    for (int i = 0; i < rows.size(); i++) {
        Row row = rows.get(i);
        String v1 = String.valueOf((char) ('a' + i));
        String v2 = Strings.repeat(v1, 2);
        assertThat(row).isNotNull();
        assertThat(row.getInt("pk")).isEqualTo(1);
        assertThat(row.getInt("cc")).isEqualTo(1 + i);
        assertThat(row.getString("v1")).isEqualTo(v1);
        assertThat(row.getString("v2")).isEqualTo(v2);
        assertThat(row.getLong("v1w")).isEqualTo(v1w);
        assertThat(row.getLong("v2w")).isEqualTo(v2w);
        assertThat(row.getInt("v1t")).isLessThanOrEqualTo(1000);
        assertThat(row.getInt("v2t")).isLessThanOrEqualTo(2000);
        v1w += 1_000_000;
        v2w += 1_000_000;
    }
    // We have 20 records and an original generated BATCH query containing 2 child statements.
    // Since batches were unwrapped, we expect 40 INSERT statements total being executed.
    // Since all records have the same partition key, we expect these to be all rebatched together.
    // The default max batch size being 32, we expect one first batch with 32 statements,
    // and a second one with the remaining 8.
    assertThat(logs.getAllMessagesAsString()).contains("Batches: total: 2, size: 20.00 mean, 8 min, 32 max");
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) ResultSet(com.datastax.oss.driver.api.core.cql.ResultSet) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) Row(com.datastax.oss.driver.api.core.cql.Row) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 99 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class CSVConnectorEndToEndCCMIT method load_ttl_timestamp_now_in_query_and_mapping_positional_external_names.

@Test
void load_ttl_timestamp_now_in_query_and_mapping_positional_external_names() {
    session.execute("DROP TABLE IF EXISTS table_ttl_timestamp");
    session.execute("CREATE TABLE table_ttl_timestamp (key int PRIMARY KEY, value text, loaded_at timeuuid)");
    List<String> args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.ignoreLeadingWhitespaces", "true", "--connector.csv.ignoreTrailingWhitespaces", "true", "--connector.csv.url", ClassLoader.getSystemResource("ttl-timestamp.csv").toExternalForm(), "--driver.pooling.local.connections", "1", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.query", "insert into table_ttl_timestamp (key, value, loaded_at) values (?, ?, now()) using ttl ? and timestamp ?", "--schema.mapping", "*=*, created_at = __timestamp, time_to_live = __ttl");
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    assertTTLAndTimestamp();
    assertThat(logs).hasMessageContaining("The special __ttl mapping token has been deprecated").hasMessageContaining("The special __timestamp mapping token has been deprecated");
    logs.clear();
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 100 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class CSVConnectorEndToEndCCMIT method unload_load_preserving_ttl_and_timestamp_custom_mapping.

@ParameterizedTest
@MethodSource("unload_load_preserving_ttl_and_timestamp")
void unload_load_preserving_ttl_and_timestamp_custom_mapping(DataType cqlType, Object value1, Object value2, String csv1, String csv2) throws IOException {
    checkCqlTypeSupported(cqlType);
    session.execute("DROP TABLE IF EXISTS preserve_ttl_timestamp");
    session.execute(String.format("CREATE TABLE preserve_ttl_timestamp (pk1 int, pk2 int, cc1 int, cc2 int, v1 %s, v2 %s, PRIMARY KEY ((pk1, pk2), cc1, cc2))", cqlType.asCql(true, true), cqlType.asCql(true, true)));
    TypeCodec<Object> codec = CodecRegistry.DEFAULT.codecFor(cqlType);
    session.execute(String.format("BEGIN BATCH " + "INSERT INTO preserve_ttl_timestamp (pk1, pk2, cc1, cc2, v1) " + "VALUES (1, 2, 3, 4, %s) " + "USING TIMESTAMP 1111 AND TTL 111111; " + "INSERT INTO preserve_ttl_timestamp (pk1, pk2, cc1, cc2, v2) " + "VALUES (1, 2, 3, 4, %s) " + "USING TIMESTAMP 2222 AND TTL 222222; " + "APPLY BATCH", codec.format(value1), codec.format(value2)));
    List<String> args = Lists.newArrayList("unload", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "preserve_ttl_timestamp", "--schema.mapping", "*=*, v1w=writetime(v1), v1t=ttl(v1), v2w=writetime(v2), v2t=ttl(v2)", "-timestamp", "true", "-ttl", "true");
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    Stream<String> line = FileUtils.readAllLinesInDirectoryAsStreamExcludingHeaders(unloadDir);
    assertThat(line).singleElement(InstanceOfAssertFactories.STRING).contains("1,2,3,4,").contains(csv1, csv2).containsPattern("1970-01-01T00:00:00\\.001111Z").containsPattern("111\\d\\d\\d,").containsPattern("1970-01-01T00:00:00\\.002222Z").containsPattern("222\\d\\d\\d");
    assertThat(logs).doesNotHaveMessageContaining("Skipping timestamp preservation").doesNotHaveMessageContaining("Skipping TTL preservation");
    FileUtils.deleteDirectory(logDir);
    logs.clear();
    session.execute("TRUNCATE preserve_ttl_timestamp");
    args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "preserve_ttl_timestamp", "--schema.mapping", "*=*, v1w=writetime(v1), v1t=ttl(v1), v2w=writetime(v2), v2t=ttl(v2)", "-timestamp", "true", "-ttl", "true");
    status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    ResultSet rs = session.execute("SELECT pk1, pk2, cc1, cc2, " + "v1, writetime(v1) AS v1w, ttl(v1) as v1t, " + "v2, writetime(v2) AS v2w, ttl(v2) as v2t " + "FROM preserve_ttl_timestamp " + "WHERE pk1 = 1 AND pk2 = 2 AND cc1 = 3 AND cc2 = 4");
    Row row = rs.one();
    assertThat(row).isNotNull();
    assertThat(row.getInt("pk1")).isEqualTo(1);
    assertThat(row.getInt("pk2")).isEqualTo(2);
    assertThat(row.getInt("cc1")).isEqualTo(3);
    assertThat(row.getInt("cc2")).isEqualTo(4);
    assertThat(row.getObject("v1")).isEqualTo(value1);
    assertThat(row.getObject("v2")).isEqualTo(value2);
    assertThat(row.getLong("v1w")).isEqualTo(1111L);
    assertThat(row.getLong("v2w")).isEqualTo(2222L);
    assertThat(row.getInt("v1t")).isLessThanOrEqualTo(111111).isGreaterThan(111000);
    assertThat(row.getInt("v2t")).isLessThanOrEqualTo(222222).isGreaterThan(222000);
    assertThat(logs).doesNotHaveMessageContaining("Skipping timestamp preservation").doesNotHaveMessageContaining("Skipping TTL preservation");
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) ResultSet(com.datastax.oss.driver.api.core.cql.ResultSet) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) Row(com.datastax.oss.driver.api.core.cql.Row) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Aggregations

DataStaxBulkLoader (com.datastax.oss.dsbulk.runner.DataStaxBulkLoader)165 ExitStatus (com.datastax.oss.dsbulk.runner.ExitStatus)165 Test (org.junit.jupiter.api.Test)142 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)135 ArrayList (java.util.ArrayList)75 Row (com.datastax.oss.driver.api.core.cql.Row)30 RequestPrime (com.datastax.oss.simulacron.common.cluster.RequestPrime)30 Prime (com.datastax.oss.simulacron.common.stubbing.Prime)30 CqlIdentifier (com.datastax.oss.driver.api.core.CqlIdentifier)22 ResultSet (com.datastax.oss.driver.api.core.cql.ResultSet)14 MethodSource (org.junit.jupiter.params.provider.MethodSource)10 Column (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column)9 Table (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Table)9 URL (java.net.URL)9 EndToEndUtils.primeIpByCountryTable (com.datastax.oss.dsbulk.runner.tests.EndToEndUtils.primeIpByCountryTable)8 CsvSource (org.junit.jupiter.params.provider.CsvSource)7 Record (com.datastax.oss.dsbulk.connectors.api.Record)6 SimulacronUtils (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils)5 Keyspace (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace)5 Path (java.nio.file.Path)5