Search in sources :

Example 36 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class CSVConnectorEndToEndCCMIT method full_load_unload_snappy.

/**
 * Simple test case which attempts to load and unload data using ccm and compression (Snappy).
 */
@Test
void full_load_unload_snappy() throws Exception {
    assumeTrue(session.getContext().getProtocolVersion().getCode() != 5, "Snappy compression is not supported in protocol v5");
    List<String> args = new ArrayList<>();
    args.add("load");
    args.add("--driver.protocol.compression");
    args.add("SNAPPY");
    args.add("--connector.csv.url");
    args.add(quoteJson(CsvUtils.CSV_RECORDS_UNIQUE));
    args.add("--connector.csv.header");
    args.add("false");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().get().asInternal());
    args.add("--schema.table");
    args.add("ip_by_country");
    args.add("--schema.mapping");
    args.add(IP_BY_COUNTRY_MAPPING_INDEXED);
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateResultSetSize(24, "SELECT * FROM ip_by_country");
    validatePositionsFile(CsvUtils.CSV_RECORDS_UNIQUE, 24);
    FileUtils.deleteDirectory(logDir);
    args = new ArrayList<>();
    args.add("unload");
    args.add("--driver.protocol.compression");
    args.add("SNAPPY");
    args.add("--connector.csv.url");
    args.add(quoteJson(unloadDir));
    args.add("--connector.csv.header");
    args.add("false");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().get().asInternal());
    args.add("--schema.table");
    args.add("ip_by_country");
    args.add("--schema.mapping");
    args.add(IP_BY_COUNTRY_MAPPING_INDEXED);
    status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateOutputFiles(24, unloadDir);
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) ArrayList(java.util.ArrayList) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 37 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class CSVConnectorEndToEndCCMIT method should_not_truncate_nor_round.

/**
 * Test for DAT-224.
 */
@Test
void should_not_truncate_nor_round() throws Exception {
    session.execute("DROP TABLE IF EXISTS numbers");
    session.execute("CREATE TABLE IF NOT EXISTS numbers (key varchar PRIMARY KEY, vdouble double, vdecimal decimal)");
    List<String> args = new ArrayList<>();
    args.add("load");
    args.add("--connector.csv.url");
    args.add(ClassLoader.getSystemResource("number.csv").toExternalForm());
    args.add("--connector.csv.header");
    args.add("true");
    args.add("--connector.csv.delimiter");
    args.add(";");
    args.add("--connector.csv.comment");
    args.add("#");
    args.add("--codec.overflowStrategy");
    args.add("REJECT");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().get().asInternal());
    args.add("--schema.table");
    args.add("numbers");
    args.add("--schema.mapping");
    args.add("*=*");
    ExitStatus loadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(loadStatus, STATUS_COMPLETED_WITH_ERRORS);
    validateExceptionsLog(1, "ArithmeticException: Cannot convert 0.12345678901234567890123456789 from BigDecimal to Double", "mapping-errors.log");
    checkNumbersWritten(OverflowStrategy.REJECT, UNNECESSARY, session);
    FileUtils.deleteDirectory(logDir);
    args = new ArrayList<>();
    args.add("unload");
    args.add("--connector.csv.url");
    args.add(quoteJson(unloadDir));
    args.add("--connector.csv.header");
    args.add("false");
    args.add("--connector.csv.delimiter");
    args.add(";");
    args.add("--connector.csv.maxConcurrentFiles");
    args.add("1");
    args.add("--codec.roundingStrategy");
    args.add("UNNECESSARY");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().get().asInternal());
    args.add("--schema.query");
    args.add("SELECT key, vdouble, vdecimal FROM numbers");
    ExitStatus unloadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(unloadStatus, STATUS_OK);
    checkNumbersRead(OverflowStrategy.REJECT, UNNECESSARY, false, unloadDir);
    FileUtils.deleteDirectory(logDir);
    // check we can load from the unloaded dataset
    args = new ArrayList<>();
    args.add("load");
    args.add("--connector.csv.url");
    args.add(quoteJson(unloadDir));
    args.add("--connector.csv.header");
    args.add("false");
    args.add("--connector.csv.delimiter");
    args.add(";");
    args.add("--codec.overflowStrategy");
    args.add("REJECT");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().get().asInternal());
    args.add("--schema.table");
    args.add("numbers");
    args.add("--schema.mapping");
    args.add("key,vdouble,vdecimal");
    loadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(loadStatus, STATUS_OK);
    checkNumbersWritten(OverflowStrategy.REJECT, UNNECESSARY, session);
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) ArrayList(java.util.ArrayList) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 38 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class CSVConnectorEndToEndCCMIT method unload_load_preserving_ttl_and_timestamp_unsupported_types.

@ParameterizedTest
@MethodSource
void unload_load_preserving_ttl_and_timestamp_unsupported_types(DataType cqlType, Object value) throws IOException {
    checkCqlTypeSupported(cqlType);
    session.execute("DROP TABLE IF EXISTS preserve_ttl_timestamp");
    String typeAsCql = cqlType.asCql(true, true);
    session.execute(String.format("CREATE TABLE preserve_ttl_timestamp (pk1 int, pk2 int, cc1 int, cc2 int, v1 %s, PRIMARY KEY ((pk1, pk2), cc1, cc2))", typeAsCql));
    TypeCodec<Object> codec = CodecRegistry.DEFAULT.codecFor(cqlType);
    session.execute(String.format("INSERT INTO preserve_ttl_timestamp (pk1, pk2, cc1, cc2, v1) " + "VALUES (1, 2, 3, 4, %s) " + "USING TIMESTAMP 1111 AND TTL 111111", codec.format(value)));
    List<String> args = Lists.newArrayList("unload", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "preserve_ttl_timestamp", "-timestamp", "true", "-ttl", "true");
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    Stream<String> line = FileUtils.readAllLinesInDirectoryAsStreamExcludingHeaders(unloadDir);
    assertThat(line).singleElement(InstanceOfAssertFactories.STRING).doesNotContain("1111", "2222");
    assertThat(logs).hasMessageContaining(String.format("Skipping timestamp preservation for column v1: this feature is not supported for CQL type %s", typeAsCql)).hasMessageContaining(String.format("Skipping TTL preservation for column v1: this feature is not supported for CQL type %s", typeAsCql));
    FileUtils.deleteDirectory(logDir);
    logs.clear();
    session.execute("TRUNCATE preserve_ttl_timestamp");
    args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "preserve_ttl_timestamp", "-timestamp", "true", "-ttl", "true");
    status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    ResultSet rs = session.execute("SELECT pk1, pk2, cc1, cc2, v1 " + "FROM preserve_ttl_timestamp " + "WHERE pk1 = 1 AND pk2 = 2 AND cc1 = 3 AND cc2 = 4");
    Row row = rs.one();
    assertThat(row).isNotNull();
    assertThat(row.getInt("pk1")).isEqualTo(1);
    assertThat(row.getInt("pk2")).isEqualTo(2);
    assertThat(row.getInt("cc1")).isEqualTo(3);
    assertThat(row.getInt("cc2")).isEqualTo(4);
    assertThat(row.getObject("v1")).isEqualTo(value);
    assertThat(logs).hasMessageContaining(String.format("Skipping timestamp preservation for column v1: this feature is not supported for CQL type %s", typeAsCql)).hasMessageContaining(String.format("Skipping TTL preservation for column v1: this feature is not supported for CQL type %s", typeAsCql));
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) ResultSet(com.datastax.oss.driver.api.core.cql.ResultSet) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) Row(com.datastax.oss.driver.api.core.cql.Row) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 39 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class CSVConnectorEndToEndCCMIT method load_ttl_timestamp_now_in_query_and_mapping_real_names_unset_values.

@Test
void load_ttl_timestamp_now_in_query_and_mapping_real_names_unset_values() {
    assumeTrue(protocolVersion.getCode() >= DefaultProtocolVersion.V4.getCode(), "Unset values are not compatible with protocol version < 4");
    session.execute("DROP TABLE IF EXISTS table_ttl_timestamp");
    session.execute("CREATE TABLE table_ttl_timestamp (key int PRIMARY KEY, value text, loaded_at timeuuid)");
    List<String> args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.ignoreLeadingWhitespaces", "true", "--connector.csv.ignoreTrailingWhitespaces", "true", "--connector.csv.url", ClassLoader.getSystemResource("ttl-timestamp-unset.csv").toExternalForm(), "--driver.pooling.local.connections", "1", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.query", "insert into table_ttl_timestamp (key, value, loaded_at) values (:key, :value, now()) using ttl :t1 and timestamp :t2", "--schema.mapping", "*=*, created_at = t2, time_to_live = t1");
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    assertTTLAndTimestampUnsetValues();
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 40 with DataStaxBulkLoader

use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.

the class CSVConnectorEndToEndCCMIT method full_load_unload_counters_custom_query_named.

@Test
void full_load_unload_counters_custom_query_named() throws IOException {
    assumeTrue((ccm.getClusterType() == Type.DSE && ccm.getVersion().compareTo(V5_1) >= 0) || (ccm.getClusterType() == OSS && ccm.getVersion().compareTo(V3_10) >= 0), "UPDATE SET += syntax is only supported in C* 3.10+ and DSE 5.1+");
    session.execute("DROP TABLE IF EXISTS counters");
    session.execute("CREATE TABLE counters (" + "pk1 int, " + "\"PK2\" int, " + "\"C1\" counter, " + "c2 counter, " + "c3 counter, " + "PRIMARY KEY (pk1, \"PK2\"))");
    List<String> args = new ArrayList<>();
    args.add("load");
    args.add("--connector.csv.url");
    args.add(quoteJson(getClass().getResource("/counters.csv")));
    args.add("--connector.csv.header");
    args.add("false");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().get().asInternal());
    args.add("--schema.query");
    args.add(quoteJson("UPDATE counters SET \"C1\" += :\"fieldC\", c2 = c2 + :\"fieldD\" WHERE pk1 = :\"fieldA\" AND \"PK2\" = :\"fieldB\""));
    args.add("--schema.mapping");
    args.add("fieldA,fieldB,fieldC,fieldD");
    ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    Row row = session.execute("SELECT \"C1\", c2, c3 FROM counters WHERE pk1 = 1 AND \"PK2\" = 2").one();
    assertThat(row.getLong("\"C1\"")).isEqualTo(42L);
    // present in the file
    assertThat(row.getLong("c2")).isZero();
    // not present in the file
    assertThat(row.isNull("c3")).isTrue();
    FileUtils.deleteDirectory(logDir);
    args = new ArrayList<>();
    args.add("unload");
    args.add("--connector.csv.url");
    args.add(quoteJson(unloadDir));
    args.add("--connector.csv.header");
    args.add("false");
    args.add("--connector.csv.maxConcurrentFiles");
    args.add("1");
    args.add("--schema.keyspace");
    args.add(session.getKeyspace().get().asInternal());
    args.add("--schema.query");
    args.add(quoteJson("SELECT pk1, \"PK2\", \"C1\", c2, c3 FROM counters"));
    status = new DataStaxBulkLoader(addCommonSettings(args)).run();
    assertStatus(status, STATUS_OK);
    validateOutputFiles(1, unloadDir);
    assertThat(FileUtils.readAllLinesInDirectoryAsStream(unloadDir)).containsExactly("1,2,42,0,");
}
Also used : ExitStatus(com.datastax.oss.dsbulk.runner.ExitStatus) ArrayList(java.util.ArrayList) DataStaxBulkLoader(com.datastax.oss.dsbulk.runner.DataStaxBulkLoader) Row(com.datastax.oss.driver.api.core.cql.Row) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Aggregations

DataStaxBulkLoader (com.datastax.oss.dsbulk.runner.DataStaxBulkLoader)165 ExitStatus (com.datastax.oss.dsbulk.runner.ExitStatus)165 Test (org.junit.jupiter.api.Test)142 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)135 ArrayList (java.util.ArrayList)75 Row (com.datastax.oss.driver.api.core.cql.Row)30 RequestPrime (com.datastax.oss.simulacron.common.cluster.RequestPrime)30 Prime (com.datastax.oss.simulacron.common.stubbing.Prime)30 CqlIdentifier (com.datastax.oss.driver.api.core.CqlIdentifier)22 ResultSet (com.datastax.oss.driver.api.core.cql.ResultSet)14 MethodSource (org.junit.jupiter.params.provider.MethodSource)10 Column (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Column)9 Table (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Table)9 URL (java.net.URL)9 EndToEndUtils.primeIpByCountryTable (com.datastax.oss.dsbulk.runner.tests.EndToEndUtils.primeIpByCountryTable)8 CsvSource (org.junit.jupiter.params.provider.CsvSource)7 Record (com.datastax.oss.dsbulk.connectors.api.Record)6 SimulacronUtils (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils)5 Keyspace (com.datastax.oss.dsbulk.tests.simulacron.SimulacronUtils.Keyspace)5 Path (java.nio.file.Path)5