use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method full_load_unload_snappy.
/**
* Simple test case which attempts to load and unload data using ccm and compression (Snappy).
*/
@Test
void full_load_unload_snappy() throws Exception {
assumeTrue(session.getContext().getProtocolVersion().getCode() != 5, "Snappy compression is not supported in protocol v5");
List<String> args = new ArrayList<>();
args.add("load");
args.add("--driver.protocol.compression");
args.add("SNAPPY");
args.add("--connector.csv.url");
args.add(quoteJson(CsvUtils.CSV_RECORDS_UNIQUE));
args.add("--connector.csv.header");
args.add("false");
args.add("--schema.keyspace");
args.add(session.getKeyspace().get().asInternal());
args.add("--schema.table");
args.add("ip_by_country");
args.add("--schema.mapping");
args.add(IP_BY_COUNTRY_MAPPING_INDEXED);
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateResultSetSize(24, "SELECT * FROM ip_by_country");
validatePositionsFile(CsvUtils.CSV_RECORDS_UNIQUE, 24);
FileUtils.deleteDirectory(logDir);
args = new ArrayList<>();
args.add("unload");
args.add("--driver.protocol.compression");
args.add("SNAPPY");
args.add("--connector.csv.url");
args.add(quoteJson(unloadDir));
args.add("--connector.csv.header");
args.add("false");
args.add("--schema.keyspace");
args.add(session.getKeyspace().get().asInternal());
args.add("--schema.table");
args.add("ip_by_country");
args.add("--schema.mapping");
args.add(IP_BY_COUNTRY_MAPPING_INDEXED);
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateOutputFiles(24, unloadDir);
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method should_not_truncate_nor_round.
/**
* Test for DAT-224.
*/
@Test
void should_not_truncate_nor_round() throws Exception {
session.execute("DROP TABLE IF EXISTS numbers");
session.execute("CREATE TABLE IF NOT EXISTS numbers (key varchar PRIMARY KEY, vdouble double, vdecimal decimal)");
List<String> args = new ArrayList<>();
args.add("load");
args.add("--connector.csv.url");
args.add(ClassLoader.getSystemResource("number.csv").toExternalForm());
args.add("--connector.csv.header");
args.add("true");
args.add("--connector.csv.delimiter");
args.add(";");
args.add("--connector.csv.comment");
args.add("#");
args.add("--codec.overflowStrategy");
args.add("REJECT");
args.add("--schema.keyspace");
args.add(session.getKeyspace().get().asInternal());
args.add("--schema.table");
args.add("numbers");
args.add("--schema.mapping");
args.add("*=*");
ExitStatus loadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(loadStatus, STATUS_COMPLETED_WITH_ERRORS);
validateExceptionsLog(1, "ArithmeticException: Cannot convert 0.12345678901234567890123456789 from BigDecimal to Double", "mapping-errors.log");
checkNumbersWritten(OverflowStrategy.REJECT, UNNECESSARY, session);
FileUtils.deleteDirectory(logDir);
args = new ArrayList<>();
args.add("unload");
args.add("--connector.csv.url");
args.add(quoteJson(unloadDir));
args.add("--connector.csv.header");
args.add("false");
args.add("--connector.csv.delimiter");
args.add(";");
args.add("--connector.csv.maxConcurrentFiles");
args.add("1");
args.add("--codec.roundingStrategy");
args.add("UNNECESSARY");
args.add("--schema.keyspace");
args.add(session.getKeyspace().get().asInternal());
args.add("--schema.query");
args.add("SELECT key, vdouble, vdecimal FROM numbers");
ExitStatus unloadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(unloadStatus, STATUS_OK);
checkNumbersRead(OverflowStrategy.REJECT, UNNECESSARY, false, unloadDir);
FileUtils.deleteDirectory(logDir);
// check we can load from the unloaded dataset
args = new ArrayList<>();
args.add("load");
args.add("--connector.csv.url");
args.add(quoteJson(unloadDir));
args.add("--connector.csv.header");
args.add("false");
args.add("--connector.csv.delimiter");
args.add(";");
args.add("--codec.overflowStrategy");
args.add("REJECT");
args.add("--schema.keyspace");
args.add(session.getKeyspace().get().asInternal());
args.add("--schema.table");
args.add("numbers");
args.add("--schema.mapping");
args.add("key,vdouble,vdecimal");
loadStatus = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(loadStatus, STATUS_OK);
checkNumbersWritten(OverflowStrategy.REJECT, UNNECESSARY, session);
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method unload_load_preserving_ttl_and_timestamp_unsupported_types.
@ParameterizedTest
@MethodSource
void unload_load_preserving_ttl_and_timestamp_unsupported_types(DataType cqlType, Object value) throws IOException {
checkCqlTypeSupported(cqlType);
session.execute("DROP TABLE IF EXISTS preserve_ttl_timestamp");
String typeAsCql = cqlType.asCql(true, true);
session.execute(String.format("CREATE TABLE preserve_ttl_timestamp (pk1 int, pk2 int, cc1 int, cc2 int, v1 %s, PRIMARY KEY ((pk1, pk2), cc1, cc2))", typeAsCql));
TypeCodec<Object> codec = CodecRegistry.DEFAULT.codecFor(cqlType);
session.execute(String.format("INSERT INTO preserve_ttl_timestamp (pk1, pk2, cc1, cc2, v1) " + "VALUES (1, 2, 3, 4, %s) " + "USING TIMESTAMP 1111 AND TTL 111111", codec.format(value)));
List<String> args = Lists.newArrayList("unload", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "preserve_ttl_timestamp", "-timestamp", "true", "-ttl", "true");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
Stream<String> line = FileUtils.readAllLinesInDirectoryAsStreamExcludingHeaders(unloadDir);
assertThat(line).singleElement(InstanceOfAssertFactories.STRING).doesNotContain("1111", "2222");
assertThat(logs).hasMessageContaining(String.format("Skipping timestamp preservation for column v1: this feature is not supported for CQL type %s", typeAsCql)).hasMessageContaining(String.format("Skipping TTL preservation for column v1: this feature is not supported for CQL type %s", typeAsCql));
FileUtils.deleteDirectory(logDir);
logs.clear();
session.execute("TRUNCATE preserve_ttl_timestamp");
args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.url", quoteJson(unloadDir), "--connector.csv.header", "true", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.table", "preserve_ttl_timestamp", "-timestamp", "true", "-ttl", "true");
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
ResultSet rs = session.execute("SELECT pk1, pk2, cc1, cc2, v1 " + "FROM preserve_ttl_timestamp " + "WHERE pk1 = 1 AND pk2 = 2 AND cc1 = 3 AND cc2 = 4");
Row row = rs.one();
assertThat(row).isNotNull();
assertThat(row.getInt("pk1")).isEqualTo(1);
assertThat(row.getInt("pk2")).isEqualTo(2);
assertThat(row.getInt("cc1")).isEqualTo(3);
assertThat(row.getInt("cc2")).isEqualTo(4);
assertThat(row.getObject("v1")).isEqualTo(value);
assertThat(logs).hasMessageContaining(String.format("Skipping timestamp preservation for column v1: this feature is not supported for CQL type %s", typeAsCql)).hasMessageContaining(String.format("Skipping TTL preservation for column v1: this feature is not supported for CQL type %s", typeAsCql));
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method load_ttl_timestamp_now_in_query_and_mapping_real_names_unset_values.
@Test
void load_ttl_timestamp_now_in_query_and_mapping_real_names_unset_values() {
assumeTrue(protocolVersion.getCode() >= DefaultProtocolVersion.V4.getCode(), "Unset values are not compatible with protocol version < 4");
session.execute("DROP TABLE IF EXISTS table_ttl_timestamp");
session.execute("CREATE TABLE table_ttl_timestamp (key int PRIMARY KEY, value text, loaded_at timeuuid)");
List<String> args = Lists.newArrayList("load", "--log.directory", quoteJson(logDir), "--connector.csv.ignoreLeadingWhitespaces", "true", "--connector.csv.ignoreTrailingWhitespaces", "true", "--connector.csv.url", ClassLoader.getSystemResource("ttl-timestamp-unset.csv").toExternalForm(), "--driver.pooling.local.connections", "1", "--schema.keyspace", session.getKeyspace().get().asInternal(), "--schema.query", "insert into table_ttl_timestamp (key, value, loaded_at) values (:key, :value, now()) using ttl :t1 and timestamp :t2", "--schema.mapping", "*=*, created_at = t2, time_to_live = t1");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
assertTTLAndTimestampUnsetValues();
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVConnectorEndToEndCCMIT method full_load_unload_counters_custom_query_named.
@Test
void full_load_unload_counters_custom_query_named() throws IOException {
assumeTrue((ccm.getClusterType() == Type.DSE && ccm.getVersion().compareTo(V5_1) >= 0) || (ccm.getClusterType() == OSS && ccm.getVersion().compareTo(V3_10) >= 0), "UPDATE SET += syntax is only supported in C* 3.10+ and DSE 5.1+");
session.execute("DROP TABLE IF EXISTS counters");
session.execute("CREATE TABLE counters (" + "pk1 int, " + "\"PK2\" int, " + "\"C1\" counter, " + "c2 counter, " + "c3 counter, " + "PRIMARY KEY (pk1, \"PK2\"))");
List<String> args = new ArrayList<>();
args.add("load");
args.add("--connector.csv.url");
args.add(quoteJson(getClass().getResource("/counters.csv")));
args.add("--connector.csv.header");
args.add("false");
args.add("--schema.keyspace");
args.add(session.getKeyspace().get().asInternal());
args.add("--schema.query");
args.add(quoteJson("UPDATE counters SET \"C1\" += :\"fieldC\", c2 = c2 + :\"fieldD\" WHERE pk1 = :\"fieldA\" AND \"PK2\" = :\"fieldB\""));
args.add("--schema.mapping");
args.add("fieldA,fieldB,fieldC,fieldD");
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
Row row = session.execute("SELECT \"C1\", c2, c3 FROM counters WHERE pk1 = 1 AND \"PK2\" = 2").one();
assertThat(row.getLong("\"C1\"")).isEqualTo(42L);
// present in the file
assertThat(row.getLong("c2")).isZero();
// not present in the file
assertThat(row.isNull("c3")).isTrue();
FileUtils.deleteDirectory(logDir);
args = new ArrayList<>();
args.add("unload");
args.add("--connector.csv.url");
args.add(quoteJson(unloadDir));
args.add("--connector.csv.header");
args.add("false");
args.add("--connector.csv.maxConcurrentFiles");
args.add("1");
args.add("--schema.keyspace");
args.add(session.getKeyspace().get().asInternal());
args.add("--schema.query");
args.add(quoteJson("SELECT pk1, \"PK2\", \"C1\", c2, c3 FROM counters"));
status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateOutputFiles(1, unloadDir);
assertThat(FileUtils.readAllLinesInDirectoryAsStream(unloadDir)).containsExactly("1,2,42,0,");
}
Aggregations