Search in sources :

Example 91 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class CSVConnectorEndToEndCCMIT method full_load_unload_using_urlfile.

@Test
void full_load_unload_using_urlfile() throws Exception {
    List<String> args = new ArrayList<>();
    args.add("load");
    args.add("--connector.csv.urlfile");
    args.add(quoteJson(urlFile));
    args.add("--connector.csv.header");
    args.add("false");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().get().asInternal());
    args.add("--schema.table");
    args.add("ip_by_country");
    args.add("--schema.mapping");
    args.add(IP_BY_COUNTRY_MAPPING_INDEXED);
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateResultSetSize(24, "SELECT * FROM ip_by_country");
    FileUtils.deleteDirectory(logDir);
    args = new ArrayList<>();
    args.add("unload");
    args.add("--connector.csv.url");
    args.add(quoteJson(unloadDir));
    args.add("--connector.csv.header");
    args.add("false");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().get().asInternal());
    args.add("--schema.table");
    args.add("ip_by_country");
    args.add("--schema.mapping");
    args.add(IP_BY_COUNTRY_MAPPING_INDEXED);
    status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateOutputFiles(24, unloadDir);
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) ArrayList(java.util.ArrayList) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 92 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class CSVConnectorEndToEndCCMIT method full_load_unload_lz4.

/**
 * Simple test case which attempts to load and unload data using ccm and compression (LZ4).
 */
@Test
void full_load_unload_lz4() throws Exception {
    List<String> args = new ArrayList<>();
    args.add("load");
    args.add("--driver.protocol.compression");
    args.add("LZ4");
    args.add("--connector.csv.url");
    args.add(quoteJson(CsvUtils.CSV_RECORDS_UNIQUE));
    args.add("--connector.csv.header");
    args.add("false");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().get().asInternal());
    args.add("--schema.table");
    args.add("ip_by_country");
    args.add("--schema.mapping");
    args.add(IP_BY_COUNTRY_MAPPING_INDEXED);
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateResultSetSize(24, "SELECT * FROM ip_by_country");
    validatePositionsFile(CsvUtils.CSV_RECORDS_UNIQUE, 24);
    FileUtils.deleteDirectory(logDir);
    args = new ArrayList<>();
    args.add("unload");
    args.add("--driver.protocol.compression");
    args.add("LZ4");
    args.add("--connector.csv.url");
    args.add(quoteJson(unloadDir));
    args.add("--connector.csv.header");
    args.add("false");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().get().asInternal());
    args.add("--schema.table");
    args.add("ip_by_country");
    args.add("--schema.mapping");
    args.add(IP_BY_COUNTRY_MAPPING_INDEXED);
    status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateOutputFiles(24, unloadDir);
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) ArrayList(java.util.ArrayList) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 93 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class CSVConnectorEndToEndCCMIT method full_load_unload_with_spaces.

/**
 * Attempt to load and unload data using ccm for a keyspace and table that is case-sensitive, and
 * with a column name containing spaces. The source data also has a header row containing spaces,
 * and the source data contains a multi-line value.
 */
@Test
void full_load_unload_with_spaces() throws Exception {
    List<String> args = new ArrayList<>();
    args.add("load");
    args.add("-url");
    args.add(quoteJson(CsvUtils.CSV_RECORDS_WITH_SPACES));
    args.add("--schema.mapping");
    args.add(quoteJson("key=key,\"my source\"=\"my destination\""));
    args.add("-header");
    args.add("true");
    args.add("-k");
    args.add("MYKS");
    args.add("-t");
    args.add("WITH_SPACES");
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateResultSetSize(1, "SELECT * FROM \"MYKS\".\"WITH_SPACES\"");
    validatePositionsFile(CsvUtils.CSV_RECORDS_WITH_SPACES, 1);
    FileUtils.deleteDirectory(logDir);
    args = new ArrayList<>();
    args.add("unload");
    args.add("-url");
    args.add(quoteJson(unloadDir));
    args.add("--connector.csv.maxConcurrentFiles");
    args.add("1");
    args.add("--schema.mapping");
    args.add(quoteJson("key=key,\"my source\"=\"my destination\""));
    args.add("-header");
    args.add("true");
    args.add("-k");
    args.add("MYKS");
    args.add("-t");
    args.add("WITH_SPACES");
    status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateOutputFiles(3, unloadDir);
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) ArrayList(java.util.ArrayList) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 94 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class CSVConnectorEndToEndCCMIT method load_constant_writetime_ttl.

@Test
void load_constant_writetime_ttl() {
    session.execute("DROP TABLE IF EXISTS load_constant_writetime_ttl");
    session.execute("CREATE TABLE load_constant_writetime_ttl (pk int PRIMARY KEY, v1 int, v2 int)");
    MockConnector.mockReads(RecordUtils.mappedCSV("pk", "1"));
    List<String> args = new ArrayList<>();
    args.add("load");
    args.add("--connector.name");
    args.add("mock");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().get().asInternal());
    args.add("--schema.table");
    args.add("load_constant_writetime_ttl");
    args.add("--schema.mapping");
    args.add("pk=pk,(int)123=v1,(int)456=v2,(int)1000=ttl(*),(timestamp)'2022-02-02T22:22:22Z'=writetime(*)");
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    List<Row> rows = session.execute("SELECT v1, v2, " + "ttl(v1) as ttlv1, ttl(v2) as ttlv2, " + "writetime(v1) as wtv1, writetime(v2) as wtv2 " + "FROM load_constant_writetime_ttl WHERE pk = 1").all();
    assertThat(rows).hasSize(1);
    assertThat(rows.get(0).getInt("v1")).isEqualTo(123);
    assertThat(rows.get(0).getInt("v2")).isEqualTo(456);
    assertThat(rows.get(0).getInt("ttlv1")).isNotZero();
    assertThat(rows.get(0).getInt("ttlv2")).isNotZero();
    Instant i = Instant.parse("2022-02-02T22:22:22Z");
    assertThat(rows.get(0).getLong("wtv1")).isEqualTo(i.toEpochMilli() * 1000);
    assertThat(rows.get(0).getLong("wtv2")).isEqualTo(i.toEpochMilli() * 1000);
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) Instant(java.time.Instant) ArrayList(java.util.ArrayList) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) Row(com.datastax.oss.driver.api.core.cql.Row) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 95 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class CSVConnectorEndToEndCCMIT method temporal_roundtrip.

/**
 * Test for DAT-236.
 */
@Test
void temporal_roundtrip() throws Exception {
    assumeTrue(ccm.getCassandraVersion().compareTo(V3) >= 0, "CQL type date is not compatible with C* < 3.0");
    session.execute("DROP TABLE IF EXISTS temporals");
    session.execute("CREATE TABLE IF NOT EXISTS temporals (key int PRIMARY KEY, vdate date, vtime time, vtimestamp timestamp)");
    List<String> args = new ArrayList<>();
    args.add("load");
    args.add("--connector.csv.ignoreLeadingWhitespaces");
    args.add("true");
    args.add("--connector.csv.ignoreTrailingWhitespaces");
    args.add("true");
    args.add("--connector.csv.url");
    args.add(ClassLoader.getSystemResource("temporal.csv").toExternalForm());
    args.add("--connector.csv.header");
    args.add("true");
    args.add("--codec.locale");
    args.add("fr_FR");
    args.add("--codec.timeZone");
    args.add("Europe/Paris");
    args.add("--codec.date");
    args.add("cccc, d MMMM uuuu");
    args.add("--codec.time");
    args.add("HHmmssSSS");
    args.add("--codec.timestamp");
    args.add("ISO_ZONED_DATE_TIME");
    args.add("--codec.unit");
    args.add("SECONDS");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().get().asInternal());
    args.add("--schema.table");
    args.add("temporals");
    args.add("--schema.mapping");
    args.add("*=*");
    ExitStatus loadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(loadStatus, STATUS_OK);
    checkTemporalsWritten(session);
    FileUtils.deleteDirectory(logDir);
    args = new ArrayList<>();
    args.add("unload");
    args.add("--connector.csv.url");
    args.add(quoteJson(unloadDir));
    args.add("--connector.csv.header");
    args.add("false");
    args.add("--connector.csv.delimiter");
    args.add(";");
    args.add("--codec.locale");
    args.add("fr_FR");
    args.add("--codec.timeZone");
    args.add("Europe/Paris");
    args.add("--codec.date");
    args.add("cccc, d MMMM uuuu");
    args.add("--codec.time");
    args.add("HHmmssSSS");
    args.add("--codec.timestamp");
    args.add("ISO_ZONED_DATE_TIME");
    args.add("--codec.unit");
    args.add("SECONDS");
    args.add("--connector.csv.maxConcurrentFiles");
    args.add("1");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().get().asInternal());
    args.add("--schema.query");
    args.add("SELECT key, vdate, vtime, vtimestamp FROM temporals");
    ExitStatus unloadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(unloadStatus, STATUS_OK);
    checkTemporalsRead(unloadDir);
    FileUtils.deleteDirectory(logDir);
    // check we can load from the unloaded dataset
    args = new ArrayList<>();
    args.add("load");
    args.add("--connector.csv.url");
    args.add(quoteJson(unloadDir));
    args.add("--connector.csv.header");
    args.add("false");
    args.add("--connector.csv.delimiter");
    args.add(";");
    args.add("--codec.locale");
    args.add("fr_FR");
    args.add("--codec.timeZone");
    args.add("Europe/Paris");
    args.add("--codec.date");
    args.add("cccc, d MMMM uuuu");
    args.add("--codec.time");
    args.add("HHmmssSSS");
    args.add("--codec.timestamp");
    args.add("ISO_ZONED_DATE_TIME");
    args.add("--codec.unit");
    args.add("SECONDS");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().get().asInternal());
    args.add("--schema.table");
    args.add("temporals");
    args.add("--schema.mapping");
    args.add("key, vdate, vtime, vtimestamp");
    loadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(loadStatus, STATUS_OK);
    checkTemporalsWritten(session);
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) ArrayList(java.util.ArrayList) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Aggregations

DataStaxBulkLoader (com.datastax.oss.dsbulk.runner.DataStaxBulkLoader)165 ExitStatus (com.datastax.oss.dsbulk.runner.ExitStatus)165 Test (org.junit.jupiter.api.Test)142 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)135 ArrayList (java.util.ArrayList)75 Row (com.datastax.oss.driver.api.core.cql.Row)30 RequestPrime (com.datastax.oss.simulacron.common.cluster.RequestPrime)30 Prime (com.datastax.oss.simulacron.common.stubbing.Prime)30 CqlIdentifier (com.datastax.oss.driver.api.core.CqlIdentifier)22 ResultSet (com.datastax.oss.driver.api.core.cql.ResultSet)14 MethodSource (org.junit.jupiter.params.provider.MethodSource)10 Column (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column)9 Table (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Table)9 URL (java.net.URL)9 EndToEndUtils.primeIpByCountryTable (com.datastax.oss.dsbulk.runner.tests.EndToEndUtils.primeIpByCountryTable)8 CsvSource (org.junit.jupiter.params.provider.CsvSource)7 Record (com.datastax.oss.dsbulk.connectors.api.Record)6 SimulacronUtils (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils)5 Keyspace (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace)5 Path (java.nio.file.Path)5