use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class EndToEndCloudIT method performLoad.
private void performLoad(String... specificArgs) throws IOException, URISyntaxException {
List<String> loadArgs = Lists.newArrayList("load", "--connector.csv.url", StringUtils.quoteJson(CsvUtils.CSV_RECORDS_UNIQUE), "--connector.csv.header", "false", "--schema.keyspace", "ks1", "--schema.table", "ip_by_country", "--schema.mapping", IP_BY_COUNTRY_MAPPING_INDEXED);
loadArgs.addAll(Arrays.asList(specificArgs));
loadArgs.addAll(commonArgs());
ExitStatus status = new DataStaxBulkLoader(loadArgs.toArray(new String[0])).run();
assertStatus(status, STATUS_OK);
ResultSet set = session.execute("SELECT * FROM ip_by_country");
List<Row> results = set.all();
assertThat(results.size()).isEqualTo(24);
validatePositionsFile(CsvUtils.CSV_RECORDS_UNIQUE, 24);
FileUtils.deleteDirectory(logDir);
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVEndToEndSimulacronIT method error_load_percentage.
@Test
void error_load_percentage() {
primeIpByCountryTable(simulacron);
RequestPrime insert = createSimpleParameterizedQuery(INSERT_INTO_IP_BY_COUNTRY);
simulacron.prime(new Prime(insert));
String[] args = { "load", "--log.maxErrors", "1%", "-header", "false", "--connector.csv.url", StringUtils.quoteJson(CsvUtils.CSV_RECORDS_PARTIAL_BAD_LONG), "--schema.keyspace", "ks1", "--schema.query", INSERT_INTO_IP_BY_COUNTRY, "--schema.mapping", IP_BY_COUNTRY_MAPPING_INDEXED, "--batch.mode", "DISABLED" };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_ABORTED_TOO_MANY_ERRORS);
assertThat(logs.getAllMessagesAsString()).contains("aborted: Too many errors, the maximum allowed is 1%");
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVEndToEndSimulacronIT method full_load_multiple_urls.
@ParameterizedTest
@MethodSource("multipleUrlsProvider")
void full_load_multiple_urls(Path urlfile) {
primeIpByCountryTable(simulacron);
RequestPrime insert = createSimpleParameterizedQuery(INSERT_INTO_IP_BY_COUNTRY);
simulacron.prime(new Prime(insert));
String[] args = { "load", "--log.verbosity", "2", "-header", "false", "--connector.csv.urlfile", StringUtils.quoteJson(urlfile.toAbsolutePath()), "--schema.keyspace", "ks1", "--schema.query", INSERT_INTO_IP_BY_COUNTRY, "--schema.mapping", IP_BY_COUNTRY_MAPPING_INDEXED };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertThat(logs.getAllMessagesAsString()).contains("Records: total: 24, successful: 24, failed: 0").contains("Batches: total: 24, size: 1.00 mean, 1 min, 1 max").contains("Writes: total: 24, successful: 24, failed: 0");
assertStatus(status, STATUS_OK);
validateQueryCount(simulacron, 24, "INSERT INTO ip_by_country", LOCAL_ONE);
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVEndToEndSimulacronIT method load_errors.
@Test
void load_errors() throws Exception {
primeIpByCountryTable(simulacron);
Map<String, Object> params = new HashMap<>();
params.put("country_name", "Sweden");
RequestPrime prime1 = createParameterizedQuery(INSERT_INTO_IP_BY_COUNTRY, params, new SuccessResult(emptyList(), new LinkedHashMap<>()));
simulacron.prime(new Prime(prime1));
// recoverable errors only
params.put("country_name", "France");
prime1 = createParameterizedQuery(INSERT_INTO_IP_BY_COUNTRY, params, new ReadTimeoutResult(LOCAL_ONE, 1, 0, false));
simulacron.prime(new Prime(prime1));
params.put("country_name", "Gregistan");
prime1 = createParameterizedQuery(INSERT_INTO_IP_BY_COUNTRY, params, new WriteTimeoutResult(LOCAL_ONE, 0, 0, WriteType.BATCH_LOG));
simulacron.prime(new Prime(prime1));
params.put("country_name", "Andybaijan");
prime1 = createParameterizedQuery(INSERT_INTO_IP_BY_COUNTRY, params, new WriteFailureResult(ONE, 0, 0, emptyMap(), WriteType.BATCH));
simulacron.prime(new Prime(prime1));
params = new HashMap<>();
params.put("country_name", "United States");
prime1 = createParameterizedQuery(INSERT_INTO_IP_BY_COUNTRY, params, new FunctionFailureResult("keyspace", "function", emptyList(), "bad function call"));
simulacron.prime(new Prime(prime1));
String[] args = { "load", "-header", "false", "--connector.csv.url", StringUtils.quoteJson(CsvUtils.CSV_RECORDS_ERROR), "--driver.advanced.retry-policy.max-retries", "1", "--schema.keyspace", "ks1", "--schema.query", INSERT_INTO_IP_BY_COUNTRY, "--schema.mapping", IP_BY_COUNTRY_MAPPING_INDEXED };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_COMPLETED_WITH_ERRORS);
// There are 24 rows of data, but two extra queries due to the retry for the write timeout and
// the unavailable.
validateQueryCount(simulacron, 26, "INSERT INTO ip_by_country", LOCAL_ONE);
validateNumberOfBadRecords(4);
validateExceptionsLog(4, "Source:", "load-errors.log");
}
use of com.datastax.oss.dsbulk.runner.DataStaxBulkLoader in project dsbulk by datastax.
the class CSVEndToEndSimulacronIT method load_long_column.
@Test
void load_long_column() {
primeIpByCountryTable(simulacron);
RequestPrime insert = createSimpleParameterizedQuery(INSERT_INTO_IP_BY_COUNTRY);
simulacron.prime(new Prime(insert));
// This will attempt to load a CSV file with column longer then 4096 characters.
String[] args = { "load", "-header", "false", "--connector.csv.url", StringUtils.quoteJson(CsvUtils.CSV_RECORDS_LONG), "--connector.csv.maxCharsPerColumn", "10000", "--schema.keyspace", "ks1", "--schema.query", INSERT_INTO_IP_BY_COUNTRY, "--schema.mapping", IP_BY_COUNTRY_MAPPING_INDEXED };
ExitStatus status = new DataStaxBulkLoader(addCommonSettings(args)).run();
assertStatus(status, STATUS_OK);
validateQueryCount(simulacron, 1, "INSERT INTO ip_by_country", LOCAL_ONE);
}
Aggregations